Browse Source

Update LuaJIT to LuaJIT/LuaJIT@bd00094

Miku AuahDark 6 years ago
parent
commit
3b6be41671
100 changed files with 6322 additions and 1070 deletions
  1. 11 0
      love/src/jni/LuaJIT-2.1/.gitignore
  2. 1 1
      love/src/jni/LuaJIT-2.1/COPYRIGHT
  3. 22 15
      love/src/jni/LuaJIT-2.1/Makefile
  4. 2 2
      love/src/jni/LuaJIT-2.1/README
  5. BIN
      love/src/jni/LuaJIT-2.1/android/arm64-v8a/libluajit.a
  6. BIN
      love/src/jni/LuaJIT-2.1/android/armeabi-v7a/libluajit.a
  7. BIN
      love/src/jni/LuaJIT-2.1/android/armeabi/libluajit.a
  8. BIN
      love/src/jni/LuaJIT-2.1/android/x86/libluajit.a
  9. 47 0
      love/src/jni/LuaJIT-2.1/build_instructions.bat
  10. 1 1
      love/src/jni/LuaJIT-2.1/doc/bluequad-print.css
  11. 1 1
      love/src/jni/LuaJIT-2.1/doc/bluequad.css
  12. 83 8
      love/src/jni/LuaJIT-2.1/doc/changes.html
  13. 10 4
      love/src/jni/LuaJIT-2.1/doc/contact.html
  14. 4 5
      love/src/jni/LuaJIT-2.1/doc/ext_c_api.html
  15. 2 3
      love/src/jni/LuaJIT-2.1/doc/ext_ffi.html
  16. 4 3
      love/src/jni/LuaJIT-2.1/doc/ext_ffi_api.html
  17. 4 6
      love/src/jni/LuaJIT-2.1/doc/ext_ffi_semantics.html
  18. 2 3
      love/src/jni/LuaJIT-2.1/doc/ext_ffi_tutorial.html
  19. 3 4
      love/src/jni/LuaJIT-2.1/doc/ext_jit.html
  20. 2 3
      love/src/jni/LuaJIT-2.1/doc/ext_profiler.html
  21. 52 22
      love/src/jni/LuaJIT-2.1/doc/extensions.html
  22. 2 3
      love/src/jni/LuaJIT-2.1/doc/faq.html
  23. 93 127
      love/src/jni/LuaJIT-2.1/doc/install.html
  24. 5 6
      love/src/jni/LuaJIT-2.1/doc/luajit.html
  25. 3 4
      love/src/jni/LuaJIT-2.1/doc/running.html
  26. 13 9
      love/src/jni/LuaJIT-2.1/doc/status.html
  27. 3 1
      love/src/jni/LuaJIT-2.1/dynasm/dasm_arm.h
  28. 1 1
      love/src/jni/LuaJIT-2.1/dynasm/dasm_arm.lua
  29. 2 1
      love/src/jni/LuaJIT-2.1/dynasm/dasm_arm64.h
  30. 1 1
      love/src/jni/LuaJIT-2.1/dynasm/dasm_arm64.lua
  31. 9 5
      love/src/jni/LuaJIT-2.1/dynasm/dasm_mips.h
  32. 69 14
      love/src/jni/LuaJIT-2.1/dynasm/dasm_mips.lua
  33. 12 0
      love/src/jni/LuaJIT-2.1/dynasm/dasm_mips64.lua
  34. 2 1
      love/src/jni/LuaJIT-2.1/dynasm/dasm_ppc.h
  35. 4 4
      love/src/jni/LuaJIT-2.1/dynasm/dasm_ppc.lua
  36. 1 1
      love/src/jni/LuaJIT-2.1/dynasm/dasm_proto.h
  37. 1 1
      love/src/jni/LuaJIT-2.1/dynasm/dasm_x64.lua
  38. 43 10
      love/src/jni/LuaJIT-2.1/dynasm/dasm_x86.h
  39. 176 33
      love/src/jni/LuaJIT-2.1/dynasm/dasm_x86.lua
  40. 2 2
      love/src/jni/LuaJIT-2.1/dynasm/dynasm.lua
  41. 1 1
      love/src/jni/LuaJIT-2.1/etc/luajit.1
  42. 1 1
      love/src/jni/LuaJIT-2.1/etc/luajit.pc
  43. 13 0
      love/src/jni/LuaJIT-2.1/lj_ircall.h.patch
  44. 31 16
      love/src/jni/LuaJIT-2.1/src/Makefile
  45. 19 17
      love/src/jni/LuaJIT-2.1/src/Makefile.dep
  46. 2 2
      love/src/jni/LuaJIT-2.1/src/host/buildvm.c
  47. 1 1
      love/src/jni/LuaJIT-2.1/src/host/buildvm.h
  48. 19 5
      love/src/jni/LuaJIT-2.1/src/host/buildvm_asm.c
  49. 2 2
      love/src/jni/LuaJIT-2.1/src/host/buildvm_fold.c
  50. 1 1
      love/src/jni/LuaJIT-2.1/src/host/buildvm_lib.c
  51. 14 3
      love/src/jni/LuaJIT-2.1/src/host/buildvm_libbc.h
  52. 27 3
      love/src/jni/LuaJIT-2.1/src/host/buildvm_peobj.c
  53. 1 1
      love/src/jni/LuaJIT-2.1/src/host/genlibbc.lua
  54. 6 5
      love/src/jni/LuaJIT-2.1/src/host/genminilua.lua
  55. 1 1
      love/src/jni/LuaJIT-2.1/src/host/minilua.c
  56. 1 1
      love/src/jni/LuaJIT-2.1/src/jit/bc.lua
  57. 9 9
      love/src/jni/LuaJIT-2.1/src/jit/bcsave.lua
  58. 2 2
      love/src/jni/LuaJIT-2.1/src/jit/dis_arm.lua
  59. 1216 0
      love/src/jni/LuaJIT-2.1/src/jit/dis_arm64.lua
  60. 12 0
      love/src/jni/LuaJIT-2.1/src/jit/dis_arm64be.lua
  61. 35 20
      love/src/jni/LuaJIT-2.1/src/jit/dis_mips.lua
  62. 17 0
      love/src/jni/LuaJIT-2.1/src/jit/dis_mips64.lua
  63. 17 0
      love/src/jni/LuaJIT-2.1/src/jit/dis_mips64el.lua
  64. 1 1
      love/src/jni/LuaJIT-2.1/src/jit/dis_mipsel.lua
  65. 2 2
      love/src/jni/LuaJIT-2.1/src/jit/dis_ppc.lua
  66. 1 1
      love/src/jni/LuaJIT-2.1/src/jit/dis_x64.lua
  67. 37 7
      love/src/jni/LuaJIT-2.1/src/jit/dis_x86.lua
  68. 17 12
      love/src/jni/LuaJIT-2.1/src/jit/dump.lua
  69. 3 2
      love/src/jni/LuaJIT-2.1/src/jit/p.lua
  70. 2 2
      love/src/jni/LuaJIT-2.1/src/jit/v.lua
  71. 1 1
      love/src/jni/LuaJIT-2.1/src/jit/zone.lua
  72. 14 20
      love/src/jni/LuaJIT-2.1/src/lauxlib.h
  73. 47 29
      love/src/jni/LuaJIT-2.1/src/lib_aux.c
  74. 38 22
      love/src/jni/LuaJIT-2.1/src/lib_base.c
  75. 1 1
      love/src/jni/LuaJIT-2.1/src/lib_bit.c
  76. 5 5
      love/src/jni/LuaJIT-2.1/src/lib_debug.c
  77. 22 25
      love/src/jni/LuaJIT-2.1/src/lib_ffi.c
  78. 1 1
      love/src/jni/LuaJIT-2.1/src/lib_init.c
  79. 9 11
      love/src/jni/LuaJIT-2.1/src/lib_io.c
  80. 14 4
      love/src/jni/LuaJIT-2.1/src/lib_jit.c
  81. 1 5
      love/src/jni/LuaJIT-2.1/src/lib_math.c
  82. 3 3
      love/src/jni/LuaJIT-2.1/src/lib_os.c
  83. 37 24
      love/src/jni/LuaJIT-2.1/src/lib_package.c
  84. 1 5
      love/src/jni/LuaJIT-2.1/src/lib_string.c
  85. 21 1
      love/src/jni/LuaJIT-2.1/src/lib_table.c
  86. 3 3
      love/src/jni/LuaJIT-2.1/src/lj.supp
  87. 178 86
      love/src/jni/LuaJIT-2.1/src/lj_alloc.c
  88. 97 18
      love/src/jni/LuaJIT-2.1/src/lj_api.c
  89. 115 32
      love/src/jni/LuaJIT-2.1/src/lj_arch.h
  90. 200 65
      love/src/jni/LuaJIT-2.1/src/lj_asm.c
  91. 1 1
      love/src/jni/LuaJIT-2.1/src/lj_asm.h
  92. 44 49
      love/src/jni/LuaJIT-2.1/src/lj_asm_arm.h
  93. 2043 0
      love/src/jni/LuaJIT-2.1/src/lj_asm_arm64.h
  94. 492 92
      love/src/jni/LuaJIT-2.1/src/lj_asm_mips.h
  95. 298 62
      love/src/jni/LuaJIT-2.1/src/lj_asm_ppc.h
  96. 428 78
      love/src/jni/LuaJIT-2.1/src/lj_asm_x86.h
  97. 1 1
      love/src/jni/LuaJIT-2.1/src/lj_bc.c
  98. 1 1
      love/src/jni/LuaJIT-2.1/src/lj_bc.h
  99. 1 1
      love/src/jni/LuaJIT-2.1/src/lj_bcdump.h
  100. 1 1
      love/src/jni/LuaJIT-2.1/src/lj_bcread.c

+ 11 - 0
love/src/jni/LuaJIT-2.1/.gitignore

@@ -0,0 +1,11 @@
+*.[oa]
+*.so
+*.obj
+*.lib
+*.exp
+*.dll
+*.exe
+*.manifest
+*.dmp
+*.swp
+.tags

+ 1 - 1
love/src/jni/LuaJIT-2.1/COPYRIGHT

@@ -1,7 +1,7 @@
 ===============================================================================
 LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
 
-Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

+ 22 - 15
love/src/jni/LuaJIT-2.1/Makefile

@@ -10,13 +10,13 @@
 # For MSVC, please follow the instructions given in src/msvcbuild.bat.
 # For MinGW and Cygwin, cd to src and run make with the Makefile there.
 #
-# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 ##############################################################################
 
 MAJVER=  2
 MINVER=  1
 RELVER=  0
-PREREL=  -beta1
+PREREL=  -beta3
 VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
 ABIVER=  5.1
 
@@ -47,17 +47,18 @@ INSTALL_PKGCONFIG= $(INSTALL_LIB)/pkgconfig
 INSTALL_TNAME= luajit-$(VERSION)
 INSTALL_TSYMNAME= luajit
 INSTALL_ANAME= libluajit-$(ABIVER).a
-INSTALL_SONAME= libluajit-$(ABIVER).so.$(MAJVER).$(MINVER).$(RELVER)
-INSTALL_SOSHORT= libluajit-$(ABIVER).so
-INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib
+INSTALL_SOSHORT1= libluajit-$(ABIVER).so
+INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER)
+INSTALL_SONAME= $(INSTALL_SOSHORT2).$(MINVER).$(RELVER)
 INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib
 INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib
+INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib
 INSTALL_PCNAME= luajit.pc
 
 INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME)
 INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME)
-INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT)
-INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT)
+INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT1)
+INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT2)
 INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME)
 INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME)
 INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME)
@@ -85,16 +86,22 @@ FILE_MAN= luajit.1
 FILE_PC= luajit.pc
 FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
 FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
-	      dis_x86.lua dis_x64.lua dis_arm.lua dis_ppc.lua \
-	      dis_mips.lua dis_mipsel.lua vmdef.lua
+	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
+	      dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
+	      dis_mips64.lua dis_mips64el.lua vmdef.lua
 
 ifeq (,$(findstring Windows,$(OS)))
-  ifeq (Darwin,$(shell uname -s))
-    INSTALL_SONAME= $(INSTALL_DYLIBNAME)
-    INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_DYLIBSHORT1)
-    INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_DYLIBSHORT2)
-    LDCONFIG= :
-  endif
+  HOST_SYS:= $(shell uname -s)
+else
+  HOST_SYS= Windows
+endif
+TARGET_SYS?= $(HOST_SYS)
+
+ifeq (Darwin,$(TARGET_SYS))
+  INSTALL_SONAME= $(INSTALL_DYLIBNAME)
+  INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1)
+  INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2)
+  LDCONFIG= :
 endif
 
 ##############################################################################

+ 2 - 2
love/src/jni/LuaJIT-2.1/README

@@ -1,11 +1,11 @@
-README for LuaJIT 2.1.0-beta1
+README for LuaJIT 2.1.0-beta3
 -----------------------------
 
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 
 Project Homepage: http://luajit.org/
 
-LuaJIT is Copyright (C) 2005-2015 Mike Pall.
+LuaJIT is Copyright (C) 2005-2017 Mike Pall.
 LuaJIT is free software, released under the MIT license.
 See full Copyright Notice in the COPYRIGHT file or in luajit.h.
 

BIN
love/src/jni/LuaJIT-2.1/android/arm64-v8a/libluajit.a


BIN
love/src/jni/LuaJIT-2.1/android/armeabi-v7a/libluajit.a


BIN
love/src/jni/LuaJIT-2.1/android/armeabi/libluajit.a


BIN
love/src/jni/LuaJIT-2.1/android/x86/libluajit.a


+ 47 - 0
love/src/jni/LuaJIT-2.1/build_instructions.bat

@@ -0,0 +1,47 @@
+rem Build instruction assume using WSL + Clang for Windows (with MSVC x86+x64 toolset for -m32 switch)
+rem This assume NDK r19 or later. See https://github.com/LuaJIT/LuaJIT/issues/477 for more information.
+rem Make sure the prebuilt is already in your PATH environment variable.
+
+mkdir android\arm64-v8a
+mkdir android\armeabi-v7a
+mkdir android\x86
+
+rem Reset error level
+ver > nul
+
+rem ARMv8
+if not exist android\arm64-v8a\libluajit.a (
+	wsl make clean
+	if "%ERRORLEVEL%" == "1" goto :error
+	wsl make HOST_LUA=luajit HOST_CC=clang.exe CC=clang CROSS=aarch64-linux-android- STATIC_CC=aarch64-linux-android21-clang "DYNAMIC_CC=aarch64-linux-android21-clang -fPIC" "TARGET_AR=aarch64-linux-android-ar.exe rcus" TARGET_LD=aarch64-linux-android21-clang TARGET_STRIP=aarch64-linux-android-strip.exe amalg
+	if "%ERRORLEVEL%" == "1" goto :error
+	copy src\libluajit.a android\arm64-v8a\libluajit.a
+	if "%ERRORLEVEL%" == "1" goto :error
+)
+
+rem ARMv7
+if not exist android\armeabi-v7a\libluajit.a (
+	wsl make clean
+	if "%ERRORLEVEL%" == "1" goto :error
+	wsl make HOST_LUA=luajit "HOST_CC=clang.exe -m32" CC=clang CROSS=arm-linux-android- STATIC_CC=armv7a-linux-androideabi16-clang "DYNAMIC_CC=armv7a-linux-androideabi16-clang -fPIC" "TARGET_AR=arm-linux-androideabi-ar.exe rcus" TARGET_LD=armv7a-linux-androideabi16-clang TARGET_STRIP=arm-linux-androideabi-strip.exe amalg
+	if "%ERRORLEVEL%" == "1" goto :error
+	copy src\libluajit.a android\armeabi-v7a\libluajit.a
+	if "%ERRORLEVEL%" == "1" goto :error
+)
+
+rem x86
+if not exist android\x86\libluajit.a (
+	wsl make clean
+	if "%ERRORLEVEL%" == "1" goto :error
+	wsl make HOST_LUA=luajit "HOST_CC=clang.exe -m32" CC=clang CROSS=i686-linux-android- STATIC_CC=i686-linux-android16-clang "DYNAMIC_CC=i686-linux-android16-clang -fPIC" "TARGET_AR=i686-linux-android-ar.exe rcus" TARGET_LD=i686-linux-android16-clang TARGET_STRIP=i686-linux-android-strip.exe amalg
+	if "%ERRORLEVEL%" == "1" goto :error
+	copy src\libluajit.a android\x86\libluajit.a
+	if "%ERRORLEVEL%" == "1" goto :error
+)
+
+goto :done
+
+:error
+exit /b 1
+
+:done

+ 1 - 1
love/src/jni/LuaJIT-2.1/doc/bluequad-print.css

@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2015 Mike Pall.
+/* Copyright (C) 2004-2018 Mike Pall.
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.

+ 1 - 1
love/src/jni/LuaJIT-2.1/doc/bluequad.css

@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2015 Mike Pall.
+/* Copyright (C) 2004-2018 Mike Pall.
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.

+ 83 - 8
love/src/jni/LuaJIT-2.1/doc/changes.html

@@ -3,8 +3,7 @@
 <head>
 <title>LuaJIT Change History</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -65,7 +64,7 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
 <div id="main">
 <p>
 This is a list of changes between the released versions of LuaJIT.<br>
-The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT&nbsp;2.0.4</strong>.<br>
+The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT&nbsp;2.0.5</strong>.<br>
 </p>
 <p>
 Please check the
@@ -74,6 +73,43 @@ to see whether newer versions are available.
 </p>
 
 <div class="major" style="background: #d0d0ff;">
+<h2 id="LuaJIT-2.1.0-beta3">LuaJIT 2.1.0-beta3 &mdash; 2017-05-01</h2>
+<ul>
+<li>Rewrite memory block allocator.</li>
+<li>Add various extension from Lua 5.2/5.3.</li>
+<li>Remove old Lua 5.0 compatibility defines.</li>
+<li>Set arg table before evaluating <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
+<li>Fix FOLD rules for <tt>math.abs()</tt> and FP negation.</li>
+<li>Fix soft-float <tt>math.abs()</tt> and negation.</li>
+<li>Fix formatting of some small denormals at low precision.</li>
+<li>LJ_GC64: Add JIT compiler support.</li>
+<li>x64/LJ_GC64: Add JIT compiler backend.</li>
+<li>x86/x64: Generate BMI2 shifts and rotates, if available.</li>
+<li>Windows/x86: Add full exception interoperability.</li>
+<li>ARM64: Add big-endian support.</li>
+<li>ARM64: Add JIT compiler backend.</li>
+<li>MIPS: Fix <tt>TSETR</tt> barrier.</li>
+<li>MIPS: Support MIPS16 interlinking.</li>
+<li>MIPS soft-float: Fix code generation for <tt>HREF</tt>.</li>
+<li>MIPS64: Add MIPS64 hard-float JIT compiler backend.</li>
+<li>MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.</li>
+<li>FFI: Compile bitfield loads/stores.</li>
+<li>Various fixes common with the 2.0 branch.</li>
+</ul>
+
+<h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 &mdash; 2016-03-03</h2>
+<ul>
+<li>Enable trace stitching.</li>
+<li>Use internal implementation for converting FP numbers to strings.</li>
+<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li>
+<li>Add MIPS soft-float support.</li>
+<li>Switch MIPS port to dual-number mode.</li>
+<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li>
+<li>FFI: Add <tt>ssize_t</tt> declaration.</li>
+<li>FFI: Parse <tt>#line NN</tt> and <tt>#NN</tt>.</li>
+<li>Various minor fixes.</li>
+</ul>
+
 <h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 &mdash; 2015-08-25</h2>
 <p>
 This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0.
@@ -86,12 +122,11 @@ Please take a look at the commit history for more details.
 <li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li>
 <li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li>
 <li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li>
-<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li>
 <li>Parse binary number literals (<tt>0bxxx</tt>).</li>
 </ul></li>
 <li>Improvements to the JIT compiler:
 <ul>
-<li>Add trace stitching.</li>
+<li>Add trace stitching (disabled for now).</li>
 <li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
 <li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
 <li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>
@@ -113,7 +148,6 @@ Please take a look at the commit history for more details.
 <li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li>
 <li>x86/x64: Drop internal x87 math functions. Use libm functions.</li>
 <li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li>
-<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li>
 <li>PPC/e500: Drop support for this architecture.</li>
 </ul></li>
 <li>FFI library:
@@ -124,12 +158,53 @@ Please take a look at the commit history for more details.
 <li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li>
 <li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li>
 <li>FFI: Add <tt>ffi.typeinfo()</tt>.</li>
-<li>FFI: Add <tt>ssize_t</tt> declaration.</li>
 </ul></li>
 </ul>
 </div>
 
 <div class="major" style="background: #ffffd0;">
+<h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 &mdash; 2017-05-01</h2>
+<ul>
+<li>Add workaround for MSVC 2015 stdio changes.</li>
+<li>Limit mcode alloc probing, depending on the available pool size.</li>
+<li>Fix overly restrictive range calculation in mcode allocation.</li>
+<li>Fix out-of-scope goto handling in parser.</li>
+<li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li>
+<li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li>
+<li>Fix GC step size calculation.</li>
+<li>Initialize <tt>uv-&gt;immutable</tt> for upvalues of loaded chunks.</li>
+<li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li>
+<li>Drop leftover regs in 'for' iterator assignment, too.</li>
+<li>Fix PHI remarking in SINK pass.</li>
+<li>Don't try to record outermost <tt>pcall()</tt> return to lower frame.</li>
+<li>Add guard for obscure aliasing between open upvalues and SSA slots.</li>
+<li>Remove assumption that <tt>lj_math_random_step()</tt> doesn't clobber FPRs.</li>
+<li>Fix handling of non-numeric strings in arithmetic coercions.</li>
+<li>Fix recording of <tt>select(n, ...)</tt> with off-trace varargs</li>
+<li>Fix install for cross-builds.</li>
+<li>Don't allocate unused 2nd result register in JIT compiler backend.</li>
+<li>Drop marks from replayed instructions when sinking.</li>
+<li>Fix unsinking check.</li>
+<li>Properly handle OOM in <tt>trace_save()</tt>.</li>
+<li>Limit number of arguments given to <tt>io.lines()</tt> and <tt>fp:lines()</tt>.</li>
+<li>Fix narrowing of <tt>TOBIT</tt>.</li>
+<li>OSX: Fix build with recent XCode.</li>
+<li>x86/x64: Don't spill an explicit <tt>REF_BASE</tt> in the IR.</li>
+<li>x86/x64: Fix instruction length decoder.</li>
+<li>x86/x64: Search for exit jumps with instruction length decoder.</li>
+<li>ARM: Fix <tt>BLX</tt> encoding for Thumb interworking calls.</li>
+<li>MIPS: Don't use <tt>RID_GP</tt> as a scratch register.</li>
+<li>MIPS: Fix emitted code for U32 to float conversion.</li>
+<li>MIPS: Backport workaround for compact unwind tables.</li>
+<li>MIPS: Fix cross-endian jit.bcsave.</li>
+<li>MIPS: Fix <tt>BC_ISNEXT</tt> fallback path.</li>
+<li>MIPS: Fix use of ffgccheck delay slots in interpreter.</li>
+<li>FFI: Fix FOLD rules for <tt>int64_t</tt> comparisons.</li>
+<li>FFI: Fix SPLIT pass for <tt>CONV i64.u64</tt>.</li>
+<li>FFI: Fix <tt>ipairs()</tt> recording.</li>
+<li>FFI: Don't propagate qualifiers into subtypes of complex.</li>
+</ul>
+
 <h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 &mdash; 2015-05-14</h2>
 <ul>
 <li>Fix stack check in narrowing optimization.</li>
@@ -797,7 +872,7 @@ no point in listing differences over earlier versions.</li>
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 10 - 4
love/src/jni/LuaJIT-2.1/doc/contact.html

@@ -3,8 +3,7 @@
 <head>
 <title>Contact</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -61,8 +60,15 @@
 </div>
 <div id="main">
 <p>
+If you want to report bugs, propose fixes or suggest enhancements,
+please use the
+<a href="https://github.com/LuaJIT/LuaJIT/issues">GitHub issue tracker</a>.
+</p>
+<p>
 Please send general questions to the
 <a href="http://luajit.org/list.html"><span class="ext">&raquo;</span>&nbsp;LuaJIT mailing list</a>.
+</p>
+<p>
 You can also send any questions you have directly to me:
 </p>
 
@@ -86,7 +92,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
 <h2>Copyright</h2>
 <p>
 All documentation is
-Copyright &copy; 2005-2015 Mike Pall.
+Copyright &copy; 2005-2018 Mike Pall.
 </p>
 
 
@@ -94,7 +100,7 @@ Copyright &copy; 2005-2015 Mike Pall.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 4 - 5
love/src/jni/LuaJIT-2.1/doc/ext_c_api.html

@@ -3,8 +3,7 @@
 <head>
 <title>Lua/C API Extensions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -91,8 +90,8 @@ other Lua/C API functions).
 </p>
 <p>
 The third argument specifies the mode, which is 'or'ed with a flag.
-The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on,
-<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or
+The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off,
+<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or
 <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code.
 </p>
 <p>
@@ -179,7 +178,7 @@ Also note that this mechanism is not without overhead.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 2 - 3
love/src/jni/LuaJIT-2.1/doc/ext_ffi.html

@@ -3,8 +3,7 @@
 <head>
 <title>FFI Library</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -322,7 +321,7 @@ without undue conversion penalties.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 4 - 3
love/src/jni/LuaJIT-2.1/doc/ext_ffi_api.html

@@ -3,8 +3,7 @@
 <head>
 <title>ffi.* API Functions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -469,6 +468,8 @@ otherwise. The following parameters are currently defined:
 <tr class="odd">
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
 <tr class="even">
+<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
+<tr class="odd">
 <td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
 </table>
 
@@ -560,7 +561,7 @@ named <tt>i</tt>.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 4 - 6
love/src/jni/LuaJIT-2.1/doc/ext_ffi_semantics.html

@@ -3,8 +3,7 @@
 <head>
 <title>FFI Semantics</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -864,7 +863,7 @@ place of a type, you'd need to use <tt>ffi.typeof("int")</tt> instead.
 <p>
 The main use for parameterized types are libraries implementing abstract
 data types
-(<a href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8"><span class="ext">&raquo;</span>&nbsp;example</a>),
+(<a href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8">example</a>),
 similar to what can be achieved with C++ template metaprogramming.
 Another use case are derived types of anonymous structs, which avoids
 pollution of the global struct namespace.
@@ -1221,13 +1220,12 @@ The following operations are currently not compiled and may exhibit
 suboptimal performance, especially when used in inner loops:
 </p>
 <ul>
-<li>Bitfield accesses and initializations.</li>
 <li>Vector operations.</li>
 <li>Table initializers.</li>
 <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
 <li>Non-default initialization of VLA/VLS or large C&nbsp;types
 (&gt; 128&nbsp;bytes or &gt; 16 array elements.</li>
-<li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
+<li>Bitfield initializations.</li>
 <li>Pointer differences for element sizes that are not a power of
 two.</li>
 <li>Calls to C&nbsp;functions with aggregates passed or returned by
@@ -1253,7 +1251,7 @@ compiled.</li>
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 2 - 3
love/src/jni/LuaJIT-2.1/doc/ext_ffi_tutorial.html

@@ -3,8 +3,7 @@
 <head>
 <title>FFI Tutorial</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -593,7 +592,7 @@ it to a local variable in the function scope is unnecessary.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 3 - 4
love/src/jni/LuaJIT-2.1/doc/ext_jit.html

@@ -3,8 +3,7 @@
 <head>
 <title>jit.* Library</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -153,7 +152,7 @@ Contains the target OS name:
 <h3 id="jit_arch"><tt>jit.arch</tt></h3>
 <p>
 Contains the target architecture name:
-"x86", "x64", "arm", "ppc", or "mips".
+"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64".
 </p>
 
 <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
@@ -191,7 +190,7 @@ if you want to know more.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 2 - 3
love/src/jni/LuaJIT-2.1/doc/ext_profiler.html

@@ -3,8 +3,7 @@
 <head>
 <title>Profiler</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -355,7 +354,7 @@ use.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 52 - 22
love/src/jni/LuaJIT-2.1/doc/extensions.html

@@ -3,8 +3,7 @@
 <head>
 <title>Extensions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -209,8 +208,8 @@ bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
 </p>
 <p>
 Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
-a different, incompatible bytecode format for ports that use this mode (e.g.
-ARM64). This may be rectified in the future.
+a different, incompatible bytecode format for all 64 bit ports. This may be
+rectified in the future.
 </p>
 
 <h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
@@ -253,6 +252,10 @@ for every call. The result is uniformly distributed between 0.0 and 1.0.
 It's correctly scaled up and rounded for <tt>math.random(n&nbsp;[,m])</tt> to
 preserve uniformity.
 </p>
+<p>
+Important: Neither this nor any other PRNG based on the simplistic
+<tt>math.random()</tt> API is suitable for cryptographic use.
+</p>
 
 <h3 id="io"><tt>io.*</tt> functions handle 64&nbsp;bit file offsets</h3>
 <p>
@@ -291,8 +294,8 @@ enabled:
 <li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li>
 <li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li>
 <li><tt>loadfile(filename [,mode [,env]])</tt>.</li>
-<li><tt>math.log(x [,base])</tt>.
-<li><tt>string.rep(s, n [,sep])</tt>.
+<li><tt>math.log(x [,base])</tt>.</li>
+<li><tt>string.rep(s, n [,sep])</tt>.</li>
 <li><tt>string.format()</tt>: <tt>%q</tt> reversible.
 <tt>%s</tt> checks <tt>__tostring</tt>.
 <tt>%a</tt> and <tt>"%A</tt> added.</li>
@@ -311,6 +314,26 @@ indexes for varargs.</li>
 <li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle
 C&nbsp;functions.</li>
 <li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li>
+<li>Lua/C API extensions:
+<tt>lua_version()</tt>
+<tt>lua_upvalueid()</tt>
+<tt>lua_upvaluejoin()</tt>
+<tt>lua_loadx()</tt>
+<tt>lua_copy()</tt>
+<tt>lua_tonumberx()</tt>
+<tt>lua_tointegerx()</tt>
+<tt>luaL_fileresult()</tt>
+<tt>luaL_execresult()</tt>
+<tt>luaL_loadfilex()</tt>
+<tt>luaL_loadbufferx()</tt>
+<tt>luaL_traceback()</tt>
+<tt>luaL_setfuncs()</tt>
+<tt>luaL_pushmodule()</tt>
+<tt>luaL_newlibtable()</tt>
+<tt>luaL_newlib()</tt>
+<tt>luaL_testudata()</tt>
+<tt>luaL_setmetatable()</tt>
+</li>
 <li>Command line option <tt>-E</tt>.</li>
 <li>Command line checks <tt>__tostring</tt> for errors.</li>
 </ul>
@@ -335,7 +358,9 @@ instead of <tt>true</tt>.</li>
 exit status.</li>
 <li><tt>debug.setmetatable()</tt> returns object.</li>
 <li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li>
-<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.
+<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li>
+<li><tt>package.searchers</tt>.</li>
+<li><tt>module()</tt> returns the module table.</li>
 </ul>
 <p>
 Note: this provides only partial compatibility with Lua 5.2 at the
@@ -349,6 +374,14 @@ break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
 LuaJIT supports some extensions from Lua&nbsp;5.3:
 <ul>
 <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
+<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
+<li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li>
+<li><tt>assert()</tt> accepts any type of error object.</li>
+<li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
+<li><tt>coroutine.isyieldable()</tt>.</li>
+<li>Lua/C API extensions:
+<tt>lua_isyieldable()</tt>
+</li>
 </ul>
 
 <h2 id="exceptions">C++ Exception Interoperability</h2>
@@ -365,25 +398,30 @@ the toolchain used to compile LuaJIT:
 </tr>
 <tr class="odd separate">
 <td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
-<td class="exccompiler">GCC 4.3+</td>
+<td class="exccompiler">GCC 4.3+, Clang</td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
 <tr class="even">
+<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
+<td class="exccompiler">GCC, Clang</td>
+<td class="excinterop"><b style="color: #00a000;">Full</b></td>
+</tr>
+<tr class="odd">
 <td class="excplatform">Other platforms, DWARF2 unwinding</td>
-<td class="exccompiler">GCC</td>
+<td class="exccompiler">GCC, Clang</td>
 <td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td class="excplatform">Windows/x64</td>
 <td class="exccompiler">MSVC or WinSDK</td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td class="excplatform">Windows/x86</td>
 <td class="exccompiler">Any</td>
-<td class="excinterop"><b style="color: #a00000;">No</b></td>
+<td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td class="excplatform">Other platforms</td>
 <td class="exccompiler">Other compilers</td>
 <td class="excinterop"><b style="color: #a00000;">No</b></td>
@@ -432,20 +470,12 @@ C++ destructors.</li>
 <li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
 <li>Throwing Lua errors across C++ frames will <b>not</b> call
 C++ destructors.</li>
-<li>Additionally, on Windows/x86 with SEH-based C++&nbsp;exceptions:
-it's <b>not</b> safe to throw a Lua error across any frames containing
-a C++ function with any try/catch construct or using variables with
-(implicit) destructors. This also applies to any functions which may be
-inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
-is called inside or outside of a try/catch or whether any object actually
-needs to be destroyed: the SEH chain is corrupted and this will eventually
-lead to the termination of the process.</li>
 </ul>
 <br class="flush">
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 2 - 3
love/src/jni/LuaJIT-2.1/doc/faq.html

@@ -3,8 +3,7 @@
 <head>
 <title>Frequently Asked Questions (FAQ)</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -176,7 +175,7 @@ the development of certain features, if they are important to you.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 93 - 127
love/src/jni/LuaJIT-2.1/doc/install.html

@@ -3,8 +3,7 @@
 <head>
 <title>Installation</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -117,14 +116,14 @@ operating systems, CPUs and compilers:
 <td class="compatos">GCC 4.2+</td>
 <td class="compatos">GCC 4.2+</td>
 <td class="compatos">XCode 5.0+<br>Clang</td>
-<td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td>
+<td class="compatos">MSVC<br>MinGW, Cygwin</td>
 </tr>
 <tr class="even">
 <td class="compatcpu">x64 (64 bit)</td>
 <td class="compatos">GCC 4.2+</td>
-<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td>
+<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td>
 <td class="compatos">XCode 5.0+<br>Clang</td>
-<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
+<td class="compatos">MSVC<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
 </tr>
 <tr class="odd">
 <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
@@ -148,7 +147,7 @@ operating systems, CPUs and compilers:
 <td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
 </tr>
 <tr class="even">
-<td class="compatcpu"><a href="#cross2">MIPS</a></td>
+<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td>
 <td class="compatos">GCC 4.3+</td>
 <td class="compatos">GCC 4.3+</td>
 <td class="compatos compatno">&nbsp;</td>
@@ -169,12 +168,19 @@ only).</li>
 <li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT
 under POSIX, MinGW or Cygwin.</li>
 <li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
-MSVC or WinSDK.</li>
+MSVC (Visual Studio).</li>
 </ul>
 <p>
 Please read the instructions given in these files, before changing
 any settings.
 </p>
+<p>
+All LuaJIT 64 bit ports use 64 bit GC objects by default (<tt>LJ_GC64</tt>).
+For x64, you can select the old 32-on-64 bit mode by adding
+<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command.
+Please check the note about the
+<a href="extensions.html#string_dump">bytecode format</a> differences, too.
+</p>
 
 <h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2>
 <h3>Prerequisites</h3>
@@ -190,8 +196,8 @@ open a terminal window and change to this directory. Now unpack the archive
 and change to the newly created directory:
 </p>
 <pre class="code">
-tar zxf LuaJIT-2.0.4.tar.gz
-cd LuaJIT-2.0.4</pre>
+tar zxf LuaJIT-2.0.5.tar.gz
+cd LuaJIT-2.0.5</pre>
 <h3>Building LuaJIT</h3>
 <p>
 The supplied Makefiles try to auto-detect the settings needed for your
@@ -202,7 +208,7 @@ which is probably the default on your system, anyway. Simply run:
 make
 </pre>
 <p>
-This always builds a native x86, x64 or PPC binary, depending on the host OS
+This always builds a native binary, depending on the host OS
 you're running this command on. Check the section on
 <a href="#cross">cross-compilation</a> for more options.
 </p>
@@ -215,8 +221,8 @@ You can add an extra prefix to the search paths by appending the
 make PREFIX=/home/myself/lj2
 </pre>
 <p>
-Note for OSX: if the <tt>MACOSX_DEPLOYMENT_TARGET</tt> environment
-variable is not set, then it's forced to <tt>10.4</tt>.
+Note for OSX: you must set the <tt>MACOSX_DEPLOYMENT_TARGET</tt>
+environment variable to a value supported by your toolchain.
 </p>
 <h3>Installing LuaJIT</h3>
 <p>
@@ -246,17 +252,7 @@ Either install one of the open source SDKs
 (<a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a> or
 <a href="http://www.cygwin.com/"><span class="ext">&raquo;</span>&nbsp;Cygwin</a>), which come with a modified
 GCC plus the required development headers.
-</p>
-<p>
-Or install Microsoft's Visual C++ (MSVC). The freely downloadable
-<a href="http://www.microsoft.com/Express/VC/"><span class="ext">&raquo;</span>&nbsp;Express Edition</a>
-works just fine, but only contains an x86 compiler.
-</p>
-<p>
-The freely downloadable
-<a href="http://msdn.microsoft.com/en-us/windowsserver/bb980924.aspx"><span class="ext">&raquo;</span>&nbsp;Windows SDK</a>
-only comes with command line tools, but this is all you need to build LuaJIT.
-It contains x86 and x64 compilers.
+Or install Microsoft's Visual Studio (MSVC).
 </p>
 <p>
 Next, download the source package and unpack it using an archive manager
@@ -264,7 +260,7 @@ Next, download the source package and unpack it using an archive manager
 </p>
 <h3>Building with MSVC</h3>
 <p>
-Open a "Visual Studio .NET Command Prompt", <tt>cd</tt> to the
+Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the
 directory where you've unpacked the sources and run these commands:
 </p>
 <pre class="code">
@@ -272,30 +268,7 @@ cd src
 msvcbuild
 </pre>
 <p>
-Then follow the installation instructions below.
-</p>
-<h3>Building with the Windows SDK</h3>
-<p>
-Open a "Windows SDK Command Shell" and select the x86 compiler:
-</p>
-<pre class="code">
-setenv /release /x86
-</pre>
-<p>
-Or select the x64 compiler:
-</p>
-<pre class="code">
-setenv /release /x64
-</pre>
-<p>
-Then <tt>cd</tt> to the directory where you've unpacked the sources
-and run these commands:
-</p>
-<pre class="code">
-cd src
-msvcbuild
-</pre>
-<p>
+Check the <tt>msvcbuild.bat</tt> file for more options.
 Then follow the installation instructions below.
 </p>
 <h3>Building with MinGW or Cygwin</h3>
@@ -333,22 +306,36 @@ directory where <tt>luajit.exe</tt> is installed
 
 <h2 id="cross">Cross-compiling LuaJIT</h2>
 <p>
+First, let's clear up some terminology:
+</p>
+<ul>
+<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li>
+<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li>
+<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li>
+<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li>
+<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li>
+</ul>
+<p>
 The GNU Makefile-based build system allows cross-compiling on any host
-for any supported target, as long as both architectures have the same
-pointer size. If you want to cross-compile to any 32 bit target on an
-x64 OS, you need to install the multilib development package (e.g.
-<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
-(<tt>HOST_CC="gcc -m32"</tt>).
+for any supported target:
 </p>
+<ul>
+<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
+<li>Both host and target architectures must have the same pointer size.</li>
+<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
+<li>64 bit targets always require compilation on a 64 bit host.</li>
+</ul>
 <p>
 You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
-target OS differ, or you'll get assembler or linker errors. E.g. if
-you're compiling on a Windows or OSX host for embedded Linux or Android,
-you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
-minimal target OS, you may need to disable the built-in allocator in
-<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. The examples
-below only show some popular targets &mdash; please check the comments
-in <tt>src/Makefile</tt> for more details.
+target OS differ, or you'll get assembler or linker errors:
+</p>
+<ul>
+<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li>
+<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li>
+<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li>
+</ul>
+<p>
+Here are some examples where host and target have the same CPU:
 </p>
 <pre class="code">
 # Cross-compile to a 32 bit binary on a multilib x64 OS
@@ -366,83 +353,71 @@ use the canonical toolchain triplets for Linux.
 </p>
 <p>
 Since there's often no easy way to detect CPU features at runtime, it's
-important to compile with the proper CPU or architecture settings. You
-can specify these when building the toolchain yourself. Or add
-<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For
-ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting,
-too. Otherwise LuaJIT may not run at the full performance of your target
-CPU.
+important to compile with the proper CPU or architecture settings:
+</o>
+<ul>
+<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li>
+<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li>
+<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li>
+<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li>
+</ul>
+<p>
+Here are some examples for targets with a different CPU than the host:
 </p>
 <pre class="code">
 # ARM soft-float
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
      TARGET_CFLAGS="-mfloat-abi=soft"
 
-# ARM soft-float ABI with VFP (example for Cortex-A8)
+# ARM soft-float ABI with VFP (example for Cortex-A9)
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
-     TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp"
+     TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
 
-# ARM hard-float ABI with VFP (armhf, requires recent toolchain)
+# ARM hard-float ABI with VFP (armhf, most modern toolchains)
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
 
-# ARM64 (requires x64 host)
+# ARM64
 make CROSS=aarch64-linux-
 
 # PPC
 make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 
-# MIPS big-endian
+# MIPS32 big-endian
 make HOST_CC="gcc -m32" CROSS=mips-linux-
-# MIPS little-endian
+# MIPS32 little-endian
 make HOST_CC="gcc -m32" CROSS=mipsel-linux-
+
+# MIPS64 big-endian
+make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
+# MIPS64 little-endian
+make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
 </pre>
 <p>
-You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/sdk/ndk/index.html"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>.
-The environment variables need to match the install locations and the
-desired target platform. E.g. Android&nbsp;4.0 corresponds to ABI level&nbsp;14.
-For details check the folder <tt>docs</tt> in the NDK directory.
-</p>
-<p>
-Only a few common variations for the different CPUs, ABIs and platforms
-are listed. Please use your own judgement for which combination you want
-to build/deploy or which lowest common denominator you want to pick:
+You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/ndk/"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>.
+Please adapt the environment variables to match the install locations and the
+desired target platform. E.g. Android&nbsp;4.1 corresponds to ABI level&nbsp;16.
 </p>
 <pre class="code">
-# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo)
-NDK=/opt/android/ndk
-NDKABI=8
-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
-NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
-
-# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS)
-NDK=/opt/android/ndk
-NDKABI=14
-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
-NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
-NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8"
-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH"
-
-# Android/MIPS, mips (MIPS32R1 hard-float), Android 4.0+ (ICS)
-NDK=/opt/android/ndk
-NDKABI=14
-NDKVER=$NDK/toolchains/mipsel-linux-android-4.6
-NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android-
-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips"
-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
+# Android/ARM64, aarch64, Android 5.0+ (L)
+NDKDIR=/opt/android/ndk
+NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
+NDKCROSS=$NDKBIN/aarch64-linux-android-
+NDKCC=$NDKBIN/aarch64-linux-android21-clang
+make CROSS=$NDKCROSS \
+     STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
+     TARGET_LD=$NDKCC
 
-# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS)
-NDK=/opt/android/ndk
-NDKABI=14
-NDKVER=$NDK/toolchains/x86-4.6
-NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android-
-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
+# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
+NDKDIR=/opt/android/ndk
+NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
+NDKCROSS=$NDKBIN/arm-linux-androideabi-
+NDKCC=$NDKBIN/armv7a-linux-androideabi16-clang
+make HOST_CC="gcc -m32" CROSS=$NDKCROSS \
+     STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
+     TARGET_LD=$NDKCC
 </pre>
 <p>
-You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:
+You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/ios/"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:
 </p>
 <p style="font-size: 8pt;">
 Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
@@ -452,18 +427,12 @@ much slower than the JIT compiler. Please complain to Apple, not me.
 Or use Android. :-p
 </p>
 <pre class="code">
-# iOS/ARM (32 bit)
-ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
-ICC=$(xcrun --sdk iphoneos --find clang)
-ISDKF="-arch armv7 -isysroot $ISDKP"
-make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \
-     TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
-
 # iOS/ARM64
 ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
 ICC=$(xcrun --sdk iphoneos --find clang)
 ISDKF="-arch arm64 -isysroot $ISDKP"
-make CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
+make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \
+     TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
 </pre>
 
 <h3 id="consoles">Cross-compiling for consoles</h3>
@@ -560,14 +529,11 @@ intend to load Lua/C modules at runtime.
 </li>
 <li>
 If you're building a 64 bit application on OSX which links directly or
-indirectly against LuaJIT, you need to link your main executable
-with these flags:
+indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode,
+you need to link your main executable with these flags:
 <pre class="code">
 -pagezero_size 10000 -image_base 100000000
 </pre>
-Also, it's recommended to <tt>rebase</tt> all (self-compiled) shared libraries
-which are loaded at runtime on OSX/x64 (e.g. C extension modules for Lua).
-See: <tt>man rebase</tt>
 </li>
 </ul>
 <p>Additional hints for initializing LuaJIT using the C API functions:</p>
@@ -653,7 +619,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 5 - 6
love/src/jni/LuaJIT-2.1/doc/luajit.html

@@ -3,8 +3,7 @@
 <head>
 <title>LuaJIT</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -152,7 +151,7 @@ Lua is a powerful, dynamic and light-weight programming language.
 It may be embedded or used as a general-purpose, stand-alone language.
 </p>
 <p>
-LuaJIT is Copyright &copy; 2005-2015 Mike Pall, released under the
+LuaJIT is Copyright &copy; 2005-2018 Mike Pall, released under the
 <a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
 </p>
 <p>
@@ -169,10 +168,10 @@ LuaJIT is Copyright &copy; 2005-2015 Mike Pall, released under the
 <tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
 </table>
 <table class="feature compiler">
-<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
+<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
 </table>
 <table class="feature cpu">
-<tr><td>x86</td><td>x64</td><td>ARM</td><td>ARM64</td><td>PPC</td><td>MIPS</td></tr>
+<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr>
 </table>
 <table class="feature fcompat">
 <tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
@@ -226,7 +225,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 3 - 4
love/src/jni/LuaJIT-2.1/doc/running.html

@@ -3,8 +3,7 @@
 <head>
 <title>Running LuaJIT</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -189,7 +188,7 @@ itself. For a description of their options and output format, please
 read the comment block at the start of their source.
 They can be found in the <tt>lib</tt> directory of the source
 distribution or installed under the <tt>jit</tt> directory. By default
-this is <tt>/usr/local/share/luajit-2.0.4/jit</tt> on POSIX
+this is <tt>/usr/local/share/luajit-2.0.5/jit</tt> on POSIX
 systems.
 </p>
 
@@ -299,7 +298,7 @@ Here are the parameters and their default settings:
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 13 - 9
love/src/jni/LuaJIT-2.1/doc/status.html

@@ -3,8 +3,7 @@
 <head>
 <title>Status</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2018">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -91,24 +90,29 @@ hooks for non-Lua functions) and shows slightly different behavior
 in LuaJIT (no per-coroutine hooks, no tail call counting).
 </li>
 <li>
-Some checks are missing in the JIT-compiled code for obscure situations
-with <b>open upvalues aliasing</b> one of the SSA slots later on (or
-vice versa). Bonus points, if you can find a real world test case for
-this.
-</li>
-<li>
 Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
 handled correctly. The error may fall through an on-trace
 <tt>pcall</tt> or it may be passed on to the function set with
 <tt>lua_atpanic</tt> on x64. This issue will be fixed with the new
 garbage collector.
 </li>
+<li>
+LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the
+<b>legacy <tt>lightuserdata</tt></b> data type.
+This is only relevant on x64 systems which use the negative part of the
+virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems
+configured with a 48 bit or 52 bit VA.
+Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside
+of that range, e.g. variables on the stack. In general, avoid this data
+type for new code and replace it with (much more performant) FFI bindings.
+FFI cdata pointers can address the full 64 bit range.
+</li>
 </ul>
 <br class="flush">
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2015 Mike Pall
+Copyright &copy; 2005-2018
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 3 - 1
love/src/jni/LuaJIT-2.1/dynasm/dasm_arm.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM ARM encoding engine.
-** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_IMMV8:
 	CK((n & 3) == 0, RANGE_I);
 	n >>= 2;
+	/* fallthrough */
       case DASM_IMML8:
       case DASM_IMML12:
 	CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
@@ -371,6 +372,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  CK(n >= 0, UNDEF_LG);
+	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
 	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4;

+ 1 - 1
love/src/jni/LuaJIT-2.1/dynasm/dasm_arm.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM ARM module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 

+ 2 - 1
love/src/jni/LuaJIT-2.1/dynasm/dasm_arm64.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM ARM64 encoding engine.
-** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -427,6 +427,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  CK(n >= 0, UNDEF_LG);
+	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
 	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;

+ 1 - 1
love/src/jni/LuaJIT-2.1/dynasm/dasm_arm64.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM ARM64 module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 

+ 9 - 5
love/src/jni/LuaJIT-2.1/dynasm/dasm_mips.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM MIPS encoding engine.
-** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -21,7 +21,7 @@ enum {
   /* The following actions need a buffer position. */
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   /* The following actions also have an argument. */
-  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
   DASM__MAX
 };
 
@@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...)
 	*pl = -pos;  /* Label exists now. */
 	b[pos++] = ofs;  /* Store pass1 offset estimate. */
 	break;
-      case DASM_IMM:
+      case DASM_IMM: case DASM_IMMS:
 #ifdef DASM_CHECKS
 	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
 #endif
@@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-	case DASM_IMM: pos++; break;
+	case DASM_IMM: case DASM_IMMS: pos++; break;
 	}
       }
       stop: (void)0;
@@ -350,13 +350,14 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  CK(n >= 0, UNDEF_LG);
+	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
 	  n = *DASM_POS2PTR(D, n);
 	  if (ins & 2048)
 	    n = n - (int)((char *)cp - base);
 	  else
-	    n = (n + (int)base) & 0x0fffffff;
+	    n = (n + (int)(size_t)base) & 0x0fffffff;
 	patchrel:
 	  CK((n & 3) == 0 &&
 	     ((n + ((ins & 2048) ? 0x00020000 : 0)) >>
@@ -367,6 +368,9 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
+	case DASM_IMMS:
+	  cp[-1] |= ((n>>3) & 4); n &= 0x1f;
+	  /* fallthrough */
 	case DASM_IMM:
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  break;

+ 69 - 14
love/src/jni/LuaJIT-2.1/dynasm/dasm_mips.lua

@@ -1,17 +1,19 @@
 ------------------------------------------------------------------------------
--- DynASM MIPS module.
+-- DynASM MIPS32/MIPS64 module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 
+local mips64 = mips64
+
 -- Module information:
 local _info = {
-  arch =	"mips",
-  description =	"DynASM MIPS module",
+  arch =	mips64 and "mips64" or "mips",
+  description =	"DynASM MIPS32/MIPS64 module",
   version =	"1.4.0",
   vernum =	 10400,
-  release =	"2015-10-18",
+  release =	"2016-05-24",
   author =	"Mike Pall",
   license =	"MIT",
 }
@@ -27,7 +29,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
 local match, gmatch = _s.match, _s.gmatch
 local concat, sort = table.concat, table.sort
 local bit = bit or require("bit")
-local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local tohex = bit.tohex
 
 -- Inherited tables and callbacks.
 local g_opt, g_arch
@@ -38,7 +41,7 @@ local wline, werror, wfatal, wwarn
 local action_names = {
   "STOP", "SECTION", "ESC", "REL_EXT",
   "ALIGN", "REL_LG", "LABEL_LG",
-  "REL_PC", "LABEL_PC", "IMM",
+  "REL_PC", "LABEL_PC", "IMM", "IMMS",
 }
 
 -- Maximum number of section buffer positions for dasm_put().
@@ -251,6 +254,10 @@ local map_op = {
   bnel_3 =	"54000000STB",
   blezl_2 =	"58000000SB",
   bgtzl_2 =	"5c000000SB",
+  daddi_3 =	mips64 and "60000000TSI",
+  daddiu_3 =	mips64 and "64000000TSI",
+  ldl_2 =	mips64 and "68000000TO",
+  ldr_2 =	mips64 and "6c000000TO",
   lb_2 =	"80000000TO",
   lh_2 =	"84000000TO",
   lwl_2 =	"88000000TO",
@@ -258,23 +265,30 @@ local map_op = {
   lbu_2 =	"90000000TO",
   lhu_2 =	"94000000TO",
   lwr_2 =	"98000000TO",
+  lwu_2 =	mips64 and "9c000000TO",
   sb_2 =	"a0000000TO",
   sh_2 =	"a4000000TO",
   swl_2 =	"a8000000TO",
   sw_2 =	"ac000000TO",
+  sdl_2 =	mips64 and "b0000000TO",
+  sdr_2 =	mips64 and "b1000000TO",
   swr_2 =	"b8000000TO",
   cache_2 =	"bc000000NO",
   ll_2 =	"c0000000TO",
   lwc1_2 =	"c4000000HO",
   pref_2 =	"cc000000NO",
   ldc1_2 =	"d4000000HO",
+  ld_2 =	mips64 and "dc000000TO",
   sc_2 =	"e0000000TO",
   swc1_2 =	"e4000000HO",
+  scd_2 =	mips64 and "f0000000TO",
   sdc1_2 =	"f4000000HO",
+  sd_2 =	mips64 and "fc000000TO",
 
   -- Opcode SPECIAL.
   nop_0 =	"00000000",
   sll_3 =	"00000000DTA",
+  sextw_2 =	"00000000DT",
   movf_2 =	"00000001DS",
   movf_3 =	"00000001DSC",
   movt_2 =	"00010001DS",
@@ -285,6 +299,7 @@ local map_op = {
   sllv_3 =	"00000004DTS",
   srlv_3 =	"00000006DTS",
   rotrv_3 =	"00000046DTS",
+  drotrv_3 =	mips64 and "00000056DTS",
   srav_3 =	"00000007DTS",
   jr_1 =	"00000008S",
   jalr_1 =	"0000f809S",
@@ -300,15 +315,22 @@ local map_op = {
   mthi_1 =	"00000011S",
   mflo_1 =	"00000012D",
   mtlo_1 =	"00000013S",
+  dsllv_3 =	mips64 and "00000014DTS",
+  dsrlv_3 =	mips64 and "00000016DTS",
+  dsrav_3 =	mips64 and "00000017DTS",
   mult_2 =	"00000018ST",
   multu_2 =	"00000019ST",
   div_2 =	"0000001aST",
   divu_2 =	"0000001bST",
+  dmult_2 =	mips64 and "0000001cST",
+  dmultu_2 =	mips64 and "0000001dST",
+  ddiv_2 =	mips64 and "0000001eST",
+  ddivu_2 =	mips64 and "0000001fST",
   add_3 =	"00000020DST",
-  move_2 =	"00000021DS",
+  move_2 =	mips64 and "00000025DS" or "00000021DS",
   addu_3 =	"00000021DST",
   sub_3 =	"00000022DST",
-  negu_2 =	"00000023DT",
+  negu_2 =	mips64 and "0000002fDT" or "00000023DT",
   subu_3 =	"00000023DST",
   and_3 =	"00000024DST",
   or_3 =	"00000025DST",
@@ -317,6 +339,10 @@ local map_op = {
   nor_3 =	"00000027DST",
   slt_3 =	"0000002aDST",
   sltu_3 =	"0000002bDST",
+  dadd_3 =	mips64 and "0000002cDST",
+  daddu_3 =	mips64 and "0000002dDST",
+  dsub_3 =	mips64 and "0000002eDST",
+  dsubu_3 =	mips64 and "0000002fDST",
   tge_2 =	"00000030ST",
   tge_3 =	"00000030STZ",
   tgeu_2 =	"00000031ST",
@@ -329,6 +355,14 @@ local map_op = {
   teq_3 =	"00000034STZ",
   tne_2 =	"00000036ST",
   tne_3 =	"00000036STZ",
+  dsll_3 =	mips64 and "00000038DTa",
+  dsrl_3 =	mips64 and "0000003aDTa",
+  drotr_3 =	mips64 and "0020003aDTa",
+  dsra_3 =	mips64 and "0000003bDTa",
+  dsll32_3 =	mips64 and "0000003cDTA",
+  dsrl32_3 =	mips64 and "0000003eDTA",
+  drotr32_3 =	mips64 and "0020003eDTA",
+  dsra32_3 =	mips64 and "0000003fDTA",
 
   -- Opcode REGIMM.
   bltz_2 =	"04000000SB",
@@ -356,13 +390,24 @@ local map_op = {
   msubu_2 =	"70000005ST",
   clz_2 =	"70000020DS=",
   clo_2 =	"70000021DS=",
+  dclz_2 =	mips64 and "70000024DS=",
+  dclo_2 =	mips64 and "70000025DS=",
   sdbbp_0 =	"7000003f",
   sdbbp_1 =	"7000003fY",
 
   -- Opcode SPECIAL3.
   ext_4 =	"7c000000TSAM", -- Note: last arg is msbd = size-1
+  dextm_4 =	mips64 and "7c000001TSAM", -- Args: pos    | size-1-32
+  dextu_4 =	mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
+  dext_4 =	mips64 and "7c000003TSAM", -- Args: pos    | size-1
+  zextw_2 =	mips64 and "7c00f803TS",
   ins_4 =	"7c000004TSAM", -- Note: last arg is msb = pos+size-1
+  dinsm_4 =	mips64 and "7c000005TSAM", -- Args: pos    | pos+size-33
+  dinsu_4 =	mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
+  dins_4 =	mips64 and "7c000007TSAM", -- Args: pos    | pos+size-1
   wsbh_2 =	"7c0000a0DT",
+  dsbh_2 =	mips64 and "7c0000a4DT",
+  dshd_2 =	mips64 and "7c000164DT",
   seb_2 =	"7c000420DT",
   seh_2 =	"7c000620DT",
   rdhwr_2 =	"7c00003bTD",
@@ -370,8 +415,12 @@ local map_op = {
   -- Opcode COP0.
   mfc0_2 =	"40000000TD",
   mfc0_3 =	"40000000TDW",
+  dmfc0_2 =	mips64 and "40200000TD",
+  dmfc0_3 =	mips64 and "40200000TDW",
   mtc0_2 =	"40800000TD",
   mtc0_3 =	"40800000TDW",
+  dmtc0_2 =	mips64 and "40a00000TD",
+  dmtc0_3 =	mips64 and "40a00000TDW",
   rdpgpr_2 =	"41400000DT",
   di_0 =	"41606000",
   di_1 =	"41606000T",
@@ -388,9 +437,11 @@ local map_op = {
 
   -- Opcode COP1.
   mfc1_2 =	"44000000TG",
+  dmfc1_2 =	mips64 and "44200000TG",
   cfc1_2 =	"44400000TG",
   mfhc1_2 =	"44600000TG",
   mtc1_2 =	"44800000TG",
+  dmtc1_2 =	mips64 and "44a00000TG",
   ctc1_2 =	"44c00000TG",
   mthc1_2 =	"44e00000TG",
 
@@ -633,7 +684,7 @@ local function parse_fpr(expr)
   werror("bad register name `"..expr.."'")
 end
 
-local function parse_imm(imm, bits, shift, scale, signed)
+local function parse_imm(imm, bits, shift, scale, signed, action)
   local n = tonumber(imm)
   if n then
     local m = sar(n, scale)
@@ -651,7 +702,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
 	 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
   else
-    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    waction(action or "IMM",
+	    (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
     return 0
   end
 end
@@ -757,12 +809,15 @@ map_op[".template__"] = function(params, template, nparams)
     elseif p == "X" then
       op = op + parse_index(params[n]); n = n + 1
     elseif p == "B" or p == "J" then
-      local mode, n, s = parse_label(params[n], false)
-      if p == "B" then n = n + 2048 end
-      waction("REL_"..mode, n, s, 1)
+      local mode, m, s = parse_label(params[n], false)
+      if p == "B" then m = m + 2048 end
+      waction("REL_"..mode, m, s, 1)
       n = n + 1
     elseif p == "A" then
       op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
+    elseif p == "a" then
+      local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
+      op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
     elseif p == "M" then
       op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
     elseif p == "N" then

+ 12 - 0
love/src/jni/LuaJIT-2.1/dynasm/dasm_mips64.lua

@@ -0,0 +1,12 @@
+------------------------------------------------------------------------------
+-- DynASM MIPS64 module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
+-- All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+mips64 = true -- Using a global is an ugly, but effective solution.
+return require("dasm_mips")

+ 2 - 1
love/src/jni/LuaJIT-2.1/dynasm/dasm_ppc.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM PPC/PPC64 encoding engine.
-** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -354,6 +354,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  CK(n >= 0, UNDEF_LG);
+	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
 	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);

+ 4 - 4
love/src/jni/LuaJIT-2.1/dynasm/dasm_ppc.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM PPC/PPC64 module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 --
 -- Support for various extensions contributed by Caio Souza Oliveira.
@@ -1722,9 +1722,9 @@ op_template = function(params, template, nparams)
     elseif p == "M" then
       op = op + parse_shiftmask(params[n], false); n = n + 1
     elseif p == "J" or p == "K" then
-      local mode, n, s = parse_label(params[n], false)
-      if p == "K" then n = n + 2048 end
-      waction("REL_"..mode, n, s, 1)
+      local mode, m, s = parse_label(params[n], false)
+      if p == "K" then m = m + 2048 end
+      waction("REL_"..mode, m, s, 1)
       n = n + 1
     elseif p == "0" then
       if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end

+ 1 - 1
love/src/jni/LuaJIT-2.1/dynasm/dasm_proto.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM encoding engine prototypes.
-** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 

+ 1 - 1
love/src/jni/LuaJIT-2.1/dynasm/dasm_x64.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM x64 module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 -- This module just sets 64 bit mode for the combined x86/x64 module.

+ 43 - 10
love/src/jni/LuaJIT-2.1/dynasm/dasm_x86.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM x86 encoding engine.
-** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
   dasm_State *D = Dst_REF;
   dasm_ActList p = D->actionlist + start;
   dasm_Section *sec = D->section;
-  int pos = sec->pos, ofs = sec->ofs, mrm = 4;
+  int pos = sec->pos, ofs = sec->ofs, mrm = -1;
   int *b;
 
   if (pos >= sec->epos) {
@@ -193,20 +193,28 @@ void dasm_put(Dst_DECL, int start, ...)
       b[pos++] = n;
       switch (action) {
       case DASM_DISP:
-	if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
-      case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
+	if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
+	/* fallthrough */
+      case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
       case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
       case DASM_IMM_D: ofs += 4; break;
       case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
       case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
-      case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
+      case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
       case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
       case DASM_SPACE: p++; ofs += n; break;
       case DASM_SETLABEL: b[pos-2] = -0x40000000; break;  /* Neg. label ofs. */
-      case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
-	if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
+      case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
+	if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
+	if (*p < 0x20 && (n&7) == 4) ofs++;
+	switch ((*p++ >> 3) & 3) {
+	case 3: n |= b[pos-3]; /* fallthrough */
+	case 2: n |= b[pos-2]; /* fallthrough */
+	case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
+	}
+	continue;
       }
-      mrm = 4;
+      mrm = -1;
     } else {
       int *pl, n;
       switch (action) {
@@ -322,11 +330,14 @@ int dasm_link(Dst_DECL, size_t *szp)
 	  pos += 2;
 	  break;
 	}
+	  /* fallthrough */
 	case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
+	  /* fallthrough */
 	case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
 	case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
 	case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
 	case DASM_LABEL_LG: p++;
+	  /* fallthrough */
 	case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
 	case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
 	case DASM_EXTERN: p += 2; break;
@@ -384,22 +395,42 @@ int dasm_encode(Dst_DECL, void *buffer)
 	    if (mrm != 5) { mm[-1] -= 0x80; break; } }
 	  if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
 	}
+	  /* fallthrough */
 	case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
 	case DASM_IMM_DB: if (((n+128)&-256) == 0) {
 	    db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
 	  } else mark = NULL;
+	  /* fallthrough */
 	case DASM_IMM_D: wd: dasmd(n); break;
 	case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
+	  /* fallthrough */
 	case DASM_IMM_W: dasmw(n); break;
 	case DASM_VREG: {
 	  int t = *p++;
-	  if (t >= 5) n <<= 4; else if (t >= 2) n <<= 3;
+	  unsigned char *ex = cp - (t&7);
+	  if ((n & 8) && t < 0xa0) {
+	    if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
+	    n &= 7;
+	  } else if (n & 0x10) {
+	    if (*ex & 0x80) {
+	      *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
+	    }
+	    while (++ex < cp) ex[-1] = *ex;
+	    if (mark) mark--;
+	    cp--;
+	    n &= 7;
+	  }
+	  if (t >= 0xc0) n <<= 4;
+	  else if (t >= 0x40) n <<= 3;
+	  else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
 	  cp[-1] ^= n;
 	  break;
 	}
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
 	  b++; n = (int)(ptrdiff_t)D->globals[-n];
-	case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
+	  /* fallthrough */
+	case DASM_REL_A: rel_a:
+	  n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
 	case DASM_REL_PC: rel_pc: {
 	  int shrink = *b++;
 	  int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
@@ -410,6 +441,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	}
 	case DASM_IMM_LG:
 	  p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
+	  /* fallthrough */
 	case DASM_IMM_PC: {
 	  int *pb = DASM_POS2PTR(D, n);
 	  n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
@@ -430,6 +462,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
 	case DASM_MARK: mark = cp; break;
 	case DASM_ESC: action = *p++;
+	  /* fallthrough */
 	default: *cp++ = action; break;
 	case DASM_SECTION: case DASM_STOP: goto stop;
 	}

+ 176 - 33
love/src/jni/LuaJIT-2.1/dynasm/dasm_x86.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM x86/x64 module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -41,7 +41,7 @@ local action_names = {
   -- int arg, 1 buffer pos:
   "DISP",  "IMM_S", "IMM_B", "IMM_W", "IMM_D",  "IMM_WB", "IMM_DB",
   -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
-  "VREG", "SPACE", -- !x64: VREG support NYI.
+  "VREG", "SPACE",
   -- ptrdiff_t arg, 1 buffer pos (address): !x64
   "SETLABEL", "REL_A",
   -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
 -- Current number of section buffer positions for dasm_put().
 local secpos = 1
 
+-- VREG kind encodings, pre-shifted by 5 bits.
+local map_vreg = {
+  ["modrm.rm.m"] = 0x00,
+  ["modrm.rm.r"] = 0x20,
+  ["opcode"] =     0x20,
+  ["sib.base"] =   0x20,
+  ["sib.index"] =  0x40,
+  ["modrm.reg"] =  0x80,
+  ["vex.v"] =      0xa0,
+  ["imm.hi"] =     0xc0,
+}
+
+-- Current number of VREG actions contributing to REX/VEX shrinkage.
+local vreg_shrink_count = 0
+
 ------------------------------------------------------------------------------
 
 -- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
   if a or num then secpos = secpos + (num or 1) end
 end
 
+-- Optionally add a VREG action.
+local function wvreg(kind, vreg, psz, sk, defer)
+  if not vreg then return end
+  waction("VREG", vreg)
+  local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
+  if b < (sk or 0) then
+    vreg_shrink_count = vreg_shrink_count + 1
+  end
+  if not defer then
+    b = b + vreg_shrink_count * 8
+    vreg_shrink_count = 0
+  end
+  wputxb(b + (psz or 0))
+end
+
 -- Add call to embedded DynASM C code.
 local function wcall(func, args)
   wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
 map_reg_valid_index[map_archdef.esp] = false
 if x64 then map_reg_valid_index[map_archdef.rsp] = false end
+if x64 then map_reg_needrex[map_archdef.Rb] = true end
 map_archdef["Ra"] = "@"..addrsize
 
 -- FP registers (internally tword sized, but use "f" as operand size).
@@ -463,16 +494,24 @@ local function wputszarg(sz, n)
 end
 
 -- Put multi-byte opcode with operand-size dependent modifications.
-local function wputop(sz, op, rex, vex)
+local function wputop(sz, op, rex, vex, vregr, vregxb)
+  local psz, sk = 0, nil
   if vex then
     local tail
     if vex.m == 1 and band(rex, 11) == 0 then
-      wputb(0xc5)
+      if x64 and vregxb then
+	sk = map_vreg["modrm.reg"]
+      else
+	wputb(0xc5)
       tail = shl(bxor(band(rex, 4), 4), 5)
-    else
+      psz = 3
+      end
+    end
+    if not tail then
       wputb(0xc4)
       wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
       tail = shl(band(rex, 8), 4)
+      psz = 4
     end
     local reg, vreg = 0, nil
     if vex.v then
@@ -482,12 +521,18 @@ local function wputop(sz, op, rex, vex)
     end
     if sz == "y" or vex.l then tail = tail + 4 end
     wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
-    if vreg then waction("VREG", vreg); wputxb(4) end
+    wvreg("vex.v", vreg)
     rex = 0
     if op >= 256 then werror("bad vex opcode") end
+  else
+    if rex ~= 0 then
+      if not x64 then werror("bad operand size") end
+    elseif (vregr or vregxb) and x64 then
+      rex = 0x10
+      sk = map_vreg["vex.v"]
+    end
   end
   local r
-  if rex ~= 0 and not x64 then werror("bad operand size") end
   if sz == "w" then wputb(102) end
   -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
   if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -496,20 +541,20 @@ local function wputop(sz, op, rex, vex)
     if rex ~= 0 then
       local opc3 = band(op, 0xffff00)
       if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
-	wputb(64 + band(rex, 15)); rex = 0
+	wputb(64 + band(rex, 15)); rex = 0; psz = 2
       end
     end
-    wputb(shr(op, 16)); op = band(op, 0xffff)
+    wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
   end
   if op >= 256 then
     local b = shr(op, 8)
-    if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
-    wputb(b)
-    op = band(op, 255)
+    if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
+    wputb(b); op = band(op, 255); psz = psz + 1
   end
-  if rex ~= 0 then wputb(64 + band(rex, 15)) end
+  if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
   if sz == "b" then op = op - 1 end
   wputb(op)
+  return psz, sk
 end
 
 -- Put ModRM or SIB formatted byte.
@@ -519,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
 end
 
 -- Put ModRM/SIB plus optional displacement.
-local function wputmrmsib(t, imark, s, vsreg)
+local function wputmrmsib(t, imark, s, vsreg, psz, sk)
   local vreg, vxreg
   local reg, xreg = t.reg, t.xreg
   if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -529,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
   -- Register mode.
   if sub(t.mode, 1, 1) == "r" then
     wputmodrm(3, s, reg)
-    if vsreg then waction("VREG", vsreg); wputxb(2) end
-    if vreg then waction("VREG", vreg); wputxb(0) end
+    wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+    wvreg("modrm.rm.r", vreg, psz+1, sk)
     return
   end
 
@@ -544,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
       -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
       wputmodrm(0, s, 4)
       if imark == "I" then waction("MARK") end
-      if vsreg then waction("VREG", vsreg); wputxb(2) end
+      wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
       wputmodrm(t.xsc, xreg, 5)
-      if vxreg then waction("VREG", vxreg); wputxb(3) end
+      wvreg("sib.index", vxreg, psz+2, sk)
     else
       -- Pure 32 bit displacement.
       if x64 and tdisp ~= "table" then
 	wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
+	wvreg("modrm.reg", vsreg, psz+1, sk)
 	if imark == "I" then waction("MARK") end
 	wputmodrm(0, 4, 5)
       else
 	riprel = x64
 	wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
+	wvreg("modrm.reg", vsreg, psz+1, sk)
 	if imark == "I" then waction("MARK") end
       end
-      if vsreg then waction("VREG", vsreg); wputxb(2) end
     end
     if riprel then -- Emit rip-relative displacement.
       if match("UWSiI", imark) then
@@ -586,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
   if xreg or band(reg, 7) == 4 then
     wputmodrm(m or 2, s, 4) -- ModRM.
     if m == nil or imark == "I" then waction("MARK") end
-    if vsreg then waction("VREG", vsreg); wputxb(2) end
+    wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
     wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
-    if vxreg then waction("VREG", vxreg); wputxb(3) end
-    if vreg then waction("VREG", vreg); wputxb(1) end
+    wvreg("sib.index", vxreg, psz+2, sk, vreg)
+    wvreg("sib.base", vreg, psz+2, sk)
   else
     wputmodrm(m or 2, s, reg) -- ModRM.
     if (imark == "I" and (m == 1 or m == 2)) or
        (m == nil and (vsreg or vreg)) then waction("MARK") end
-    if vsreg then waction("VREG", vsreg); wputxb(2) end
-    if vreg then waction("VREG", vreg); wputxb(1) end
+    wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+    wvreg("modrm.rm.m", vreg, psz+1, sk)
   end
 
   -- Put displacement.
@@ -909,6 +955,7 @@ end
 --   "u"       Use VEX encoding, vvvv unused.
 --   "v"/"V"   Use VEX encoding, vvvv from 1st/2nd operand (the operand is
 --             removed from the list used by future characters).
+--   "w"       Use VEX encoding, vvvv from 3rd operand.
 --   "L"       Force VEX.L
 --
 -- All of the following characters force a flush of the opcode:
@@ -1490,8 +1537,8 @@ local map_op = {
   vrcpss_3 =	"rrro:F30FV53rM|rrx/ood:",
   vrsqrtps_2 =	"rmoy:0Fu52rM",
   vrsqrtss_3 =	"rrro:F30FV52rM|rrx/ood:",
-  vroundpd_3 =	"rmioy:660F3AV09rMU",
-  vroundps_3 =	"rmioy:660F3AV08rMU",
+  vroundpd_3 =	"rmioy:660F3Au09rMU",
+  vroundps_3 =	"rmioy:660F3Au08rMU",
   vroundsd_4 =	"rrrio:660F3AV0BrMU|rrxi/ooq:",
   vroundss_4 =	"rrrio:660F3AV0ArMU|rrxi/ood:",
   vshufpd_4 =	"rrmioy:660FVC6rMU",
@@ -1521,6 +1568,12 @@ local map_op = {
 
   -- AVX, AVX2 integer ops
   -- In general, xmm requires AVX, ymm requires AVX2.
+  vaesdec_3 =  "rrmo:660F38VDErM",
+  vaesdeclast_3 = "rrmo:660F38VDFrM",
+  vaesenc_3 =  "rrmo:660F38VDCrM",
+  vaesenclast_3 = "rrmo:660F38VDDrM",
+  vaesimc_2 =  "rmo:660F38uDBrM",
+  vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
   vlddqu_2 =	"rxoy:F20FuF0rM",
   vmaskmovdqu_2 = "rro:660FuF7rM",
   vmovdqa_2 =	"rmoy:660Fu6FrM|mroy:660Fu7FRm",
@@ -1621,6 +1674,95 @@ local map_op = {
   vpsravd_3 =	"rrmoy:660F38V46rM",
   vpsrlvd_3 =	"rrmoy:660F38V45rM",
   vpsrlvq_3 =	"rrmoy:660F38VX45rM",
+
+  -- Intel ADX
+  adcx_2 =	"rmqd:660F38F6rM",
+  adox_2 =	"rmqd:F30F38F6rM",
+
+  -- BMI1
+  andn_3 =	"rrmqd:0F38VF2rM",
+  bextr_3 =	"rmrqd:0F38wF7rM",
+  blsi_2 =	"rmqd:0F38vF33m",
+  blsmsk_2 =	"rmqd:0F38vF32m",
+  blsr_2 =	"rmqd:0F38vF31m",
+  tzcnt_2 =	"rmqdw:F30FBCrM",
+
+  -- BMI2
+  bzhi_3 =	"rmrqd:0F38wF5rM",
+  mulx_3 =	"rrmqd:F20F38VF6rM",
+  pdep_3 =	"rrmqd:F20F38VF5rM",
+  pext_3 =	"rrmqd:F30F38VF5rM",
+  rorx_3 =	"rmSqd:F20F3AuF0rMS",
+  sarx_3 =	"rmrqd:F30F38wF7rM",
+  shrx_3 =	"rmrqd:F20F38wF7rM",
+  shlx_3 =	"rmrqd:660F38wF7rM",
+
+  -- FMA3
+  vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
+  vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
+  vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
+  vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
+  vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
+  vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
+
+  vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
+  vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
+  vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
+  vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
+  vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
+  vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
+
+  vfmadd132pd_3 = "rrmoy:660F38VX98rM",
+  vfmadd132ps_3 = "rrmoy:660F38V98rM",
+  vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
+  vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
+  vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
+  vfmadd213ps_3 = "rrmoy:660F38VA8rM",
+  vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
+  vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
+  vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
+  vfmadd231ps_3 = "rrmoy:660F38VB8rM",
+  vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
+  vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
+
+  vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
+  vfmsub132ps_3 = "rrmoy:660F38V9ArM",
+  vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
+  vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
+  vfmsub213pd_3 = "rrmoy:660F38VXAArM",
+  vfmsub213ps_3 = "rrmoy:660F38VAArM",
+  vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
+  vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
+  vfmsub231pd_3 = "rrmoy:660F38VXBArM",
+  vfmsub231ps_3 = "rrmoy:660F38VBArM",
+  vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
+  vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
+
+  vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
+  vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
+  vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
+  vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
+  vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
+  vfnmadd213ps_3 = "rrmoy:660F38VACrM",
+  vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
+  vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
+  vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
+  vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
+  vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
+  vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
+
+  vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
+  vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
+  vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
+  vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
+  vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
+  vfnmsub213ps_3 = "rrmoy:660F38VAErM",
+  vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
+  vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
+  vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
+  vfnmsub231ps_3 = "rrmoy:660F38VBErM",
+  vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
+  vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
 }
 
 ------------------------------------------------------------------------------
@@ -1761,10 +1903,11 @@ local function dopattern(pat, args, sz, op, needrex)
       if t.xreg and t.xreg > 7 then rex = rex + 2 end
       if s > 7 then rex = rex + 4 end
       if needrex then rex = rex + 16 end
-      wputop(szov, opcode, rex, vex); opcode = nil
+      local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
+      opcode = nil
       local imark = sub(pat, -1) -- Force a mark (ugly).
       -- Put ModRM/SIB with regno/last digit as spare.
-      wputmrmsib(t, imark, s, addin and addin.vreg)
+      wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
       addin = nil
     elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
       local b = band(opcode, 255); opcode = shr(opcode, 8)
@@ -1791,8 +1934,8 @@ local function dopattern(pat, args, sz, op, needrex)
 	if szov == "q" and rex == 0 then rex = rex + 8 end
 	if needrex then rex = rex + 16 end
 	if addin and addin.reg == -1 then
-	  wputop(szov, opcode - 7, rex, vex)
-	  waction("VREG", addin.vreg); wputxb(0)
+	  local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
+	  wvreg("opcode", addin.vreg, psz, sk)
 	else
 	  if addin and addin.reg > 7 then rex = rex + 1 end
 	  wputop(szov, opcode, rex, vex)
@@ -1836,7 +1979,7 @@ local function dopattern(pat, args, sz, op, needrex)
 	  local reg = a.reg
 	  if reg < 0 then
 	    wputb(0)
-	    waction("VREG", a.vreg); wputxb(5)
+	    wvreg("imm.hi", a.vreg)
 	  else
 	    wputb(shl(reg, 4))
 	  end
@@ -1988,8 +2131,8 @@ if x64 then
 	rex = a.reg > 7 and 9 or 8
       end
     end
-    wputop(sz, opcode, rex)
-    if vreg then waction("VREG", vreg); wputxb(0) end
+    local psz, sk = wputop(sz, opcode, rex, nil, vreg)
+    wvreg("opcode", vreg, psz, sk)
     waction("IMM_D", format("(unsigned int)(%s)", op64))
     waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
   end

+ 2 - 2
love/src/jni/LuaJIT-2.1/dynasm/dynasm.lua

@@ -2,7 +2,7 @@
 -- DynASM. A dynamic assembler for code generation engines.
 -- Originally designed and implemented for LuaJIT.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- See below for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -17,7 +17,7 @@ local _info = {
   url =		"http://luajit.org/dynasm.html",
   license =	"MIT",
   copyright =	[[
-Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the

+ 1 - 1
love/src/jni/LuaJIT-2.1/etc/luajit.1

@@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end"
 Runs some nested loops and shows the resulting traces.
 .SH COPYRIGHT
 .PP
-\fBLuaJIT\fR is Copyright \(co 2005-2015 Mike Pall.
+\fBLuaJIT\fR is Copyright \(co 2005-2017 Mike Pall.
 .br
 \fBLuaJIT\fR is open source software, released under the MIT license.
 .SH SEE ALSO

+ 1 - 1
love/src/jni/LuaJIT-2.1/etc/luajit.pc

@@ -2,7 +2,7 @@
 majver=2
 minver=1
 relver=0
-version=${majver}.${minver}.${relver}-beta1
+version=${majver}.${minver}.${relver}-beta3
 abiver=5.1
 
 prefix=/usr/local

+ 13 - 0
love/src/jni/LuaJIT-2.1/lj_ircall.h.patch

@@ -0,0 +1,13 @@
+diff --git a/src/lj_ircall.h b/src/lj_ircall.h
+index 9b3883b..d19edcb 100644
+--- a/src/lj_ircall.h
++++ b/src/lj_ircall.h
+@@ -330,7 +330,7 @@ extern double lj_vm_sfmax(double a, double b);
+ #endif
+ 
+ #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP)
+-#ifdef __GNUC__
++#if defined(__GNUC__) || defined(__clang__)
+ #define fp64_l2d __floatdidf
+ #define fp64_ul2d __floatundidf
+ #define fp64_l2f __floatdisf

+ 31 - 16
love/src/jni/LuaJIT-2.1/src/Makefile

@@ -7,7 +7,7 @@
 # Also works with MinGW and Cygwin on Windows.
 # Please check msvcbuild.bat for building with MSVC on Windows.
 #
-# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 ##############################################################################
 
 MAJVER=  2
@@ -110,6 +110,9 @@ XCFLAGS=
 #XCFLAGS+= -DLUAJIT_NUMMODE=1
 #XCFLAGS+= -DLUAJIT_NUMMODE=2
 #
+# Disable LJ_GC64 mode for x64.
+#XCFLAGS+= -DLUAJIT_DISABLE_GC64
+#
 ##############################################################################
 
 ##############################################################################
@@ -121,8 +124,8 @@ XCFLAGS=
 #
 # Use the system provided memory allocator (realloc) instead of the
 # bundled memory allocator. This is slower, but sometimes helpful for
-# debugging. This option cannot be enabled on x64, since realloc usually
-# doesn't return addresses in the right address range.
+# debugging. This option cannot be enabled on x64 without GC64, since
+# realloc usually doesn't return addresses in the right address range.
 # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
 # the only way to get useful results from it for all other architectures.
 #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
@@ -155,21 +158,20 @@ XCFLAGS=
 
 ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
   HOST_SYS= Windows
-  HOST_RM= del
 else
   HOST_SYS:= $(shell uname -s)
   ifneq (,$(findstring MINGW,$(HOST_SYS)))
     HOST_SYS= Windows
     HOST_MSYS= mingw
   endif
+  ifneq (,$(findstring MSYS,$(HOST_SYS)))
+    HOST_SYS= Windows
+    HOST_MSYS= mingw
+  endif
   ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
     HOST_SYS= Windows
     HOST_MSYS= cygwin
   endif
-  # Use Clang for OSX host.
-  ifeq (Darwin,$(HOST_SYS))
-    DEFAULT_CC= clang
-  endif
 endif
 
 ##############################################################################
@@ -192,7 +194,7 @@ CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
 LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
 
 HOST_CC= $(CC)
-HOST_RM= rm -f
+HOST_RM?= rm -f
 # If left blank, minilua is built and used. You can supply an installed
 # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua
 HOST_LUA=
@@ -218,6 +220,7 @@ TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER)
 TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib
 TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME)
 TARGET_DLLNAME= lua$(NODOTABIVER).dll
+TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a
 TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME)
 TARGET_DYNXLDOPTS=
 
@@ -243,6 +246,9 @@ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
   TARGET_LJARCH= arm
 else
 ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
+    TARGET_ARCH= -D__AARCH64EB__=1
+  endif
   TARGET_LJARCH= arm64
 else
 ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
@@ -257,7 +263,11 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
   ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
     TARGET_ARCH= -D__MIPSEL__=1
   endif
-  TARGET_LJARCH= mips
+  ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
+    TARGET_LJARCH= mips64
+  else
+    TARGET_LJARCH= mips
+  endif
 else
   $(error Unsupported target architecture)
 endif
@@ -299,18 +309,19 @@ endif
 TARGET_SYS?= $(HOST_SYS)
 ifeq (Windows,$(TARGET_SYS))
   TARGET_STRIP+= --strip-unneeded
-  TARGET_XSHLDFLAGS= -shared
+  TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME)
   TARGET_DYNXLDOPTS=
+  HOST_RM= del
 else
+  TARGET_AR+= 2>/dev/null
 ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
   TARGET_XCFLAGS+= -fno-stack-protector
 endif
 ifeq (Darwin,$(TARGET_SYS))
   ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
-    export MACOSX_DEPLOYMENT_TARGET=10.4
+    $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY)
   endif
   TARGET_STRIP+= -x
-  TARGET_AR+= 2>/dev/null
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
@@ -321,7 +332,6 @@ ifeq (Darwin,$(TARGET_SYS))
 else
 ifeq (iOS,$(TARGET_SYS))
   TARGET_STRIP+= -x
-  TARGET_AR+= 2>/dev/null
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
@@ -388,6 +398,11 @@ DASM_XFLAGS=
 DASM_AFLAGS=
 DASM_ARCH= $(TARGET_LJARCH)
 
+ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D ENDIAN_LE
+else
+  DASM_AFLAGS+= -D ENDIAN_BE
+endif
 ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
   DASM_AFLAGS+= -D P64
 endif
@@ -473,7 +488,7 @@ LJLIB_C= $(LJLIB_O:.o=.c)
 LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
-	  lj_strfmt.o lj_api.o lj_profile.o \
+	  lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \
 	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
@@ -622,7 +637,7 @@ $(MINILUA_T): $(MINILUA_O)
 	$(E) "HOSTLINK  $@"
 	$(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
 
-host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP)
+host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua
 	$(E) "DYNASM    $@"
 	$(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
 

+ 19 - 17
love/src/jni/LuaJIT-2.1/src/Makefile.dep

@@ -3,8 +3,8 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h
 lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
- lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
- lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
+ lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \
+ lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
  lj_strfmt.h lj_lib.h lj_libdef.h
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
@@ -94,7 +94,7 @@ lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_crecord.h lj_strfmt.h
 lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
- lj_ccallback.h
+ lj_ccallback.h lj_buf.h
 lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
  lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
@@ -163,7 +163,7 @@ lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
 lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
- lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
+ lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h
 lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
  lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
@@ -188,6 +188,8 @@ lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_str.h lj_char.h
 lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
+lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_char.h lj_strscan.h
 lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -213,19 +215,19 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
  lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
  lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
  lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
- lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
- lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
- lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
- lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
- lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
- lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
- lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
- lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
- lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
- lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
- lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
- lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
- lib_init.c
+ lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \
+ lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
+ lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
+ lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
+ lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
+ lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
+ lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
+ lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
+ lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
+ lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
+ lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
+ lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
+ lib_ffi.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

+ 2 - 2
love/src/jni/LuaJIT-2.1/src/host/buildvm.c

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** This is a tool to build the hand-tuned assembler code required for
 ** LuaJIT's bytecode interpreter. It supports a variety of output formats
@@ -110,7 +110,7 @@ static const char *sym_decorate(BuildCtx *ctx,
   if (p) {
 #if LJ_TARGET_X86ORX64
     if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
-      name[0] = '@';
+      name[0] = name[1] == 'R' ? '_' : '@';  /* Just for _RtlUnwind@16. */
     else
       *p = '\0';
 #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/host/buildvm.h

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _BUILDVM_H

+ 19 - 5
love/src/jni/LuaJIT-2.1/src/host/buildvm_asm.c

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: Assembler source code emitter.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "buildvm.h"
@@ -93,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
 {
   int i;
   for (i = 0; i < n; i += 4) {
+    uint32_t ins = *(uint32_t *)(p+i);
+#if LJ_TARGET_ARM64 && LJ_BE
+    ins = lj_bswap(ins);  /* ARM64 instructions are always little-endian. */
+#endif
     if ((i & 15) == 0)
-      fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i));
+      fprintf(ctx->fp, "\t.long 0x%08x", ins);
     else
-      fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i));
+      fprintf(ctx->fp, ",0x%08x", ins);
     if ((i & 15) == 12) putc('\n', ctx->fp);
   }
   if ((n & 15) != 0) putc('\n', ctx->fp);
@@ -214,7 +218,8 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc
   case BUILD_machasm:
     fprintf(ctx->fp,
       "\n\t.private_extern %s\n"
-      "%s:\n", name, name);
+      "\t.no_dead_strip %s\n"
+      "%s:\n", name, name, name);
     break;
   default:
     break;
@@ -261,11 +266,20 @@ void emit_asm(BuildCtx *ctx)
 
 #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
   /* This should really be moved into buildvm_arm.dasc. */
+#if LJ_ARCH_HASFPU
+  fprintf(ctx->fp,
+	  ".fnstart\n"
+	  ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n"
+	  ".vsave {d8-d15}\n"
+	  ".save {r4}\n"
+	  ".pad #28\n");
+#else
   fprintf(ctx->fp,
 	  ".fnstart\n"
 	  ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
 	  ".pad #28\n");
 #endif
+#endif
 #if LJ_TARGET_MIPS
   fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
 #endif
@@ -324,7 +338,7 @@ void emit_asm(BuildCtx *ctx)
 #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
     fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
 #endif
-#if LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
     /* Hard-float ABI. */
     fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
 #endif

+ 2 - 2
love/src/jni/LuaJIT-2.1/src/host/buildvm_fold.c

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: IR folding hash table generator.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "buildvm.h"
@@ -9,7 +9,7 @@
 
 /* Context for the folding hash table generator. */
 static int lineno;
-static int funcidx;
+static uint32_t funcidx;
 static uint32_t foldkeys[BUILD_MAX_FOLD];
 static uint32_t nkeys;
 

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/host/buildvm_lib.c

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: library definition compiler.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "buildvm.h"

+ 14 - 3
love/src/jni/LuaJIT-2.1/src/host/buildvm_libbc.h

@@ -15,7 +15,12 @@ static const uint8_t libbc_code[] = {
 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
-2,0,76,3,2,0,75,0,1,0,0,2,0
+2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
+3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
+0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
+41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
+18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
+6,252,127,76,4,2,0,0
 #else
 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
@@ -28,7 +33,12 @@ static const uint8_t libbc_code[] = {
 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
-2,0,76,3,2,0,75,0,1,0,0,2,0
+2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
+3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
+0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
+41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
+18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
+6,252,127,76,4,2,0,0
 #endif
 };
 
@@ -40,6 +50,7 @@ static const struct { const char *name; int ofs; } libbc_map[] = {
 {"table_foreach",136},
 {"table_getn",207},
 {"table_remove",226},
-{NULL,355}
+{"table_move",355},
+{NULL,502}
 };
 

+ 27 - 3
love/src/jni/LuaJIT-2.1/src/host/buildvm_peobj.c

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: PE object emitter.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Only used for building on Windows, since we cannot assume the presence
 ** of a suitable assembler. The host and target byte order must match.
@@ -109,6 +109,8 @@ enum {
 #if LJ_TARGET_X64
   PEOBJ_SECT_PDATA,
   PEOBJ_SECT_XDATA,
+#elif LJ_TARGET_X86
+  PEOBJ_SECT_SXDATA,
 #endif
   PEOBJ_SECT_RDATA_Z,
   PEOBJ_NSECTIONS
@@ -208,6 +210,13 @@ void emit_peobj(BuildCtx *ctx)
   sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
   /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
   pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
+#elif LJ_TARGET_X86
+  memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1);
+  pesect[PEOBJ_SECT_SXDATA].ofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4);
+  pesect[PEOBJ_SECT_SXDATA].relocofs = sofs;
+  /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */
+  pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240;
 #endif
 
   memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
@@ -232,7 +241,7 @@ void emit_peobj(BuildCtx *ctx)
   nrsym = ctx->nrelocsym;
   pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
 #if LJ_TARGET_X64
-  pehdr.nsyms += 1;  /* Symbol for lj_err_unwind_win64. */
+  pehdr.nsyms += 1;  /* Symbol for lj_err_unwind_win. */
 #endif
 
   /* Write PE object header and all sections. */
@@ -312,6 +321,19 @@ void emit_peobj(BuildCtx *ctx)
     reloc.type = PEOBJ_RELOC_ADDR32NB;
     owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
   }
+#elif LJ_TARGET_X86
+  /* Write .sxdata section. */
+  for (i = 0; i < nrsym; i++) {
+    if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) {
+      uint32_t symidx = 1+2+i;
+      owrite(ctx, &symidx, 4);
+      break;
+    }
+  }
+  if (i == nrsym) {
+    fprintf(stderr, "Error: extern lj_err_unwind_win not used\n");
+    exit(1);
+  }
 #endif
 
   /* Write .rdata$Z section. */
@@ -333,8 +355,10 @@ void emit_peobj(BuildCtx *ctx)
 #if LJ_TARGET_X64
     emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
     emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
-    emit_peobj_sym(ctx, "lj_err_unwind_win64", 0,
+    emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
 		   PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
+#elif LJ_TARGET_X86
+    emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA);
 #endif
 
     emit_peobj_sym(ctx, ctx->beginsym, 0,

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/host/genlibbc.lua

@@ -2,7 +2,7 @@
 -- Lua script to dump the bytecode of the library functions written in Lua.
 -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
 ----------------------------------------------------------------------------
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 

+ 6 - 5
love/src/jni/LuaJIT-2.1/src/host/genminilua.lua

@@ -2,7 +2,7 @@
 -- Lua script to generate a customized, minified version of Lua.
 -- The resulting 'minilua' is used for the build process of LuaJIT.
 ----------------------------------------------------------------------------
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 
@@ -157,11 +157,11 @@ local function merge_includes(src)
     if includes[name] then return "" end
     includes[name] = true
     local fp = assert(io.open(LUA_SOURCE..name, "r"))
-    local src = fp:read("*a")
+    local inc = fp:read("*a")
     assert(fp:close())
-    src = gsub(src, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "")
-    src = gsub(src, "#endif%s*$", "")
-    return merge_includes(src)
+    inc = gsub(inc, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "")
+    inc = gsub(inc, "#endif%s*$", "")
+    return merge_includes(inc)
   end)
 end
 
@@ -300,6 +300,7 @@ local function strip_unused3(src)
   src = gsub(src, "if%([^\n]*hookmask[^\n]*&&\n[^\n]*%b{}\n", "")
   src = gsub(src, "(twoto%b()%()", "%1(size_t)")
   src = gsub(src, "i<sizenode", "i<(int)sizenode")
+  src = gsub(src, "cast%(unsigned int,key%-1%)", "cast(unsigned int,key)-1")
   return gsub(src, "\n\n+", "\n")
 end
 

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/host/minilua.c

@@ -1606,7 +1606,7 @@ luaC_barriert(L,t,key);
 return gval(mp);
 }
 static const TValue*luaH_getnum(Table*t,int key){
-if(cast(unsigned int,key-1)<cast(unsigned int,t->sizearray))
+if(cast(unsigned int,key)-1<cast(unsigned int,t->sizearray))
 return&t->array[key-1];
 else{
 lua_Number nk=cast_num(key);

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/jit/bc.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT bytecode listing module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --

+ 9 - 9
love/src/jni/LuaJIT-2.1/src/jit/bcsave.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT module to save/list bytecode.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -63,8 +63,8 @@ local map_type = {
 }
 
 local map_arch = {
-  x86 = true, x64 = true, arm = true, arm64 = true, ppc = true,
-  mips = true, mipsel = true,
+  x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true,
+  ppc = true, mips = true, mipsel = true,
 }
 
 local map_os = {
@@ -125,12 +125,12 @@ extern "C"
 #ifdef _WIN32
 __declspec(dllexport)
 #endif
-const char %s%s[] = {
+const unsigned char %s%s[] = {
 ]], LJBC_PREFIX, ctx.modname))
   else
     fp:write(string.format([[
 #define %s%s_SIZE %d
-static const char %s%s[] = {
+static const unsigned char %s%s[] = {
 ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
   end
   local t, n, m = {}, 0, 0
@@ -200,7 +200,7 @@ typedef struct {
 ]]
   local symname = LJBC_PREFIX..ctx.modname
   local is64, isbe = false, false
-  if ctx.arch == "x64" or ctx.arch == "arm64" then
+  if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then
     is64 = true
   elseif ctx.arch == "ppc" or ctx.arch == "mips" then
     isbe = true
@@ -237,9 +237,9 @@ typedef struct {
   hdr.eendian = isbe and 2 or 1
   hdr.eversion = 1
   hdr.type = f16(1)
-  hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
+  hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
   if ctx.arch == "mips" or ctx.arch == "mipsel" then
-    hdr.flags = 0x50001006
+    hdr.flags = f32(0x50001006)
   end
   hdr.version = f32(1)
   hdr.shofs = fofs(ffi.offsetof(o, "sect"))
@@ -275,7 +275,7 @@ typedef struct {
   o.sect[2].size = fofs(ofs)
   o.sect[3].type = f32(3) -- .strtab
   o.sect[3].ofs = fofs(sofs + ofs)
-  o.sect[3].size = fofs(#symname+1)
+  o.sect[3].size = fofs(#symname+2)
   ffi.copy(o.space+ofs+1, symname)
   ofs = ofs + #symname + 2
   o.sect[4].type = f32(1) -- .rodata

+ 2 - 2
love/src/jni/LuaJIT-2.1/src/jit/dis_arm.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT ARM disassembler module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -12,7 +12,7 @@
 
 local type = type
 local sub, byte, format = string.sub, string.byte, string.format
-local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local match, gmatch = string.match, string.gmatch
 local concat = table.concat
 local bit = require("bit")
 local band, bor, ror, tohex = bit.band, bit.bor, bit.ror, bit.tohex

+ 1216 - 0
love/src/jni/LuaJIT-2.1/src/jit/dis_arm64.lua

@@ -0,0 +1,1216 @@
+----------------------------------------------------------------------------
+-- LuaJIT ARM64 disassembler module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+--
+-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
+-- Sponsored by Cisco Systems, Inc.
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- It disassembles most user-mode AArch64 instructions.
+-- NYI: Advanced SIMD and VFP instructions.
+------------------------------------------------------------------------------
+
+local type = type
+local sub, byte, format = string.sub, string.byte, string.format
+local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local concat = table.concat
+local bit = require("bit")
+local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex
+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+local ror = bit.ror
+
+------------------------------------------------------------------------------
+-- Opcode maps
+------------------------------------------------------------------------------
+
+local map_adr = { -- PC-relative addressing.
+  shift = 31, mask = 1,
+  [0] = "adrDBx", "adrpDBx"
+}
+
+local map_addsubi = { -- Add/subtract immediate.
+  shift = 29, mask = 3,
+  [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg",
+}
+
+local map_logi = { -- Logical immediate.
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 22, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
+    },
+    false -- unallocated
+  },
+  {
+    shift = 29, mask = 3,
+    [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
+  }
+}
+
+local map_movwi = { -- Move wide immediate.
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 22, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
+    }, false -- unallocated
+  },
+  {
+    shift = 29, mask = 3,
+    [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
+  },
+}
+
+local map_bitf = { -- Bitfield.
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 22, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w",
+      "bfm|bfi|bfxilDN13w",
+      "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w"
+    }
+  },
+  {
+    shift = 22, mask = 1,
+    {
+      shift = 29, mask = 3,
+      [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x",
+      "bfm|bfi|bfxilDN13x",
+      "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x"
+    }
+  }
+}
+
+local map_datai = { -- Data processing - immediate.
+  shift = 23, mask = 7,
+  [0] = map_adr, map_adr, map_addsubi, false,
+  map_logi, map_movwi, map_bitf,
+  {
+    shift = 15, mask = 0x1c0c1,
+    [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x",
+    [0x10081] = "extr|rorDNM4x"
+  }
+}
+
+local map_logsr = { -- Logical, shifted register.
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 15, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = {
+	shift = 21, mask = 7,
+	[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
+	"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
+      },
+      {
+	shift = 21, mask = 7,
+	[0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
+	     "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
+      },
+      {
+	shift = 21, mask = 7,
+	[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
+	"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
+      },
+      {
+	shift = 21, mask = 7,
+	[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
+	"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
+      }
+    },
+    false -- unallocated
+  },
+  {
+    shift = 29, mask = 3,
+    [0] = {
+      shift = 21, mask = 7,
+      [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
+      "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
+    },
+    {
+      shift = 21, mask = 7,
+      [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
+      "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
+    },
+    {
+      shift = 21, mask = 7,
+      [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
+      "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
+    },
+    {
+      shift = 21, mask = 7,
+      [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
+      "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
+    }
+  }
+}
+
+local map_assh = {
+  shift = 31, mask = 1,
+  [0] = {
+    shift = 15, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = {
+	shift = 22, mask = 3,
+	[0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg",
+	      "adds|cmnD0NMSg", "adds|cmnD0NMg"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
+	      "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
+      },
+    },
+    false -- unallocated
+  },
+  {
+    shift = 29, mask = 3,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg",
+	    "adds|cmnD0NMg"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
+	    "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
+    }
+  }
+}
+
+local map_addsubsh = { -- Add/subtract, shifted register.
+  shift = 22, mask = 3,
+  [0] = map_assh, map_assh, map_assh
+}
+
+local map_addsubex = { -- Add/subtract, extended register.
+  shift = 22, mask = 3,
+  [0] = {
+    shift = 29, mask = 3,
+    [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg",
+  }
+}
+
+local map_addsubc = { -- Add/subtract, with carry.
+  shift = 10, mask = 63,
+  [0] = {
+    shift = 29, mask = 3,
+    [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg",
+  }
+}
+
+local map_ccomp = {
+  shift = 4, mask = 1,
+  [0] = {
+    shift = 10, mask = 3,
+    [0] = { -- Conditional compare register.
+      shift = 29, mask = 3,
+      "ccmnNMVCg", false, "ccmpNMVCg",
+    },
+    [2] = {  -- Conditional compare immediate.
+      shift = 29, mask = 3,
+      "ccmnN5VCg", false, "ccmpN5VCg",
+    }
+  }
+}
+
+local map_csel = { -- Conditional select.
+  shift = 11, mask = 1,
+  [0] = {
+    shift = 10, mask = 1,
+    [0] = {
+      shift = 29, mask = 3,
+      [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false,
+    },
+    {
+      shift = 29, mask = 3,
+      [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false,
+    }
+  }
+}
+
+local map_data1s = { -- Data processing, 1 source.
+  shift = 29, mask = 1,
+  [0] = {
+    shift = 31, mask = 1,
+    [0] = {
+      shift = 10, mask = 0x7ff,
+      [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg"
+    },
+    {
+      shift = 10, mask = 0x7ff,
+      [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg"
+    }
+  }
+}
+
+local map_data2s = { -- Data processing, 2 sources.
+  shift = 29, mask = 1,
+  [0] = {
+    shift = 10, mask = 63,
+    false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg",
+    "lsrDNMg", "asrDNMg", "rorDNMg"
+  }
+}
+
+local map_data3s = { -- Data processing, 3 sources.
+  shift = 29, mask = 7,
+  [0] = {
+    shift = 21, mask = 7,
+    [0] = {
+      shift = 15, mask = 1,
+      [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g"
+    }
+  }, false, false, false,
+  {
+    shift = 15, mask = 1,
+    [0] = {
+      shift = 21, mask = 7,
+      [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false,
+      false, "umaddl|umullDxNMwA0x", "umulhDNMx"
+    },
+    {
+      shift = 21, mask = 7,
+      [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false,
+      false, "umsubl|umneglDxNMwA0x"
+    }
+  }
+}
+
+local map_datar = { -- Data processing, register.
+  shift = 28, mask = 1,
+  [0] = {
+    shift = 24, mask = 1,
+    [0] = map_logsr,
+    {
+      shift = 21, mask = 1,
+      [0] = map_addsubsh, map_addsubex
+    }
+  },
+  {
+    shift = 21, mask = 15,
+    [0] = map_addsubc, false, map_ccomp, false, map_csel, false,
+    {
+      shift = 30, mask = 1,
+      [0] = map_data2s, map_data1s
+    },
+    false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s,
+    map_data3s, map_data3s, map_data3s
+  }
+}
+
+local map_lrl = { -- Load register, literal.
+  shift = 26, mask = 1,
+  [0] = {
+    shift = 30, mask = 3,
+    [0] = "ldrDwB", "ldrDxB", "ldrswDxB"
+  },
+  {
+    shift = 30, mask = 3,
+    [0] = "ldrDsB", "ldrDdB"
+  }
+}
+
+local map_lsriind = { -- Load/store register, immediate pre/post-indexed.
+  shift = 30, mask = 3,
+  [0] = {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL"
+    }
+  },
+  {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL"
+    }
+  },
+  {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "strDwzL", "ldrDwzL", "ldrswDxzL"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "strDszL", "ldrDszL"
+    }
+  },
+  {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 22, mask = 3,
+      [0] = "strDxzL", "ldrDxzL"
+    },
+    {
+      shift = 22, mask = 3,
+      [0] = "strDdzL", "ldrDdzL"
+    }
+  }
+}
+
+local map_lsriro = {
+  shift = 21, mask = 1,
+  [0] = {  -- Load/store register immediate.
+    shift = 10, mask = 3,
+    [0] = { -- Unscaled immediate.
+      shift = 26, mask = 1,
+      [0] = {
+	shift = 30, mask = 3,
+	[0] = {
+	  shift = 22, mask = 3,
+	  [0] = "sturbDwK", "ldurbDwK"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "sturhDwK", "ldurhDwK"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "sturDwK", "ldurDwK"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "sturDxK", "ldurDxK"
+	}
+      }
+    }, map_lsriind, false, map_lsriind
+  },
+  {  -- Load/store register, register offset.
+    shift = 10, mask = 3,
+    [2] = {
+      shift = 26, mask = 1,
+      [0] = {
+	shift = 30, mask = 3,
+	[0] = {
+	  shift = 22, mask = 3,
+	  [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "strDwO", "ldrDwO", "ldrswDxO"
+	},
+	{
+	  shift = 22, mask = 3,
+	  [0] = "strDxO", "ldrDxO"
+	}
+      },
+      {
+	shift = 30, mask = 3,
+	[2] = {
+	  shift = 22, mask = 3,
+	  [0] = "strDsO", "ldrDsO"
+	},
+	[3] = {
+	  shift = 22, mask = 3,
+	  [0] = "strDdO", "ldrDdO"
+	}
+      }
+    }
+  }
+}
+
+local map_lsp = { -- Load/store register pair, offset.
+  shift = 22, mask = 1,
+  [0] = {
+    shift = 30, mask = 3,
+    [0] = {
+      shift = 26, mask = 1,
+      [0] = "stpDzAzwP", "stpDzAzsP",
+    },
+    {
+      shift = 26, mask = 1,
+      "stpDzAzdP"
+    },
+    {
+      shift = 26, mask = 1,
+      [0] = "stpDzAzxP"
+    }
+  },
+  {
+    shift = 30, mask = 3,
+    [0] = {
+      shift = 26, mask = 1,
+      [0] = "ldpDzAzwP", "ldpDzAzsP",
+    },
+    {
+      shift = 26, mask = 1,
+      [0] = "ldpswDAxP", "ldpDzAzdP"
+    },
+    {
+      shift = 26, mask = 1,
+      [0] = "ldpDzAzxP"
+    }
+  }
+}
+
+local map_ls = { -- Loads and stores.
+  shift = 24, mask = 0x31,
+  [0x10] = map_lrl, [0x30] = map_lsriro,
+  [0x20] = {
+    shift = 23, mask = 3,
+    map_lsp, map_lsp, map_lsp
+  },
+  [0x21] = {
+    shift = 23, mask = 3,
+    map_lsp, map_lsp, map_lsp
+  },
+  [0x31] = {
+    shift = 26, mask = 1,
+    [0] = {
+      shift = 30, mask = 3,
+      [0] = {
+	shift = 22, mask = 3,
+	[0] = "strbDwzU", "ldrbDwzU"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "strhDwzU", "ldrhDwzU"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "strDwzU", "ldrDwzU"
+      },
+      {
+	shift = 22, mask = 3,
+	[0] = "strDxzU", "ldrDxzU"
+      }
+    },
+    {
+      shift = 30, mask = 3,
+      [2] = {
+	shift = 22, mask = 3,
+	[0] = "strDszU", "ldrDszU"
+      },
+      [3] = {
+	shift = 22, mask = 3,
+	[0] = "strDdzU", "ldrDdzU"
+      }
+    }
+  },
+}
+
+local map_datafp = { -- Data processing, SIMD and FP.
+  shift = 28, mask = 7,
+  { -- 001
+    shift = 24, mask = 1,
+    [0] = {
+      shift = 21, mask = 1,
+      {
+	shift = 10, mask = 3,
+	[0] = {
+	  shift = 12, mask = 1,
+	  [0] = {
+	    shift = 13, mask = 1,
+	    [0] = {
+	      shift = 14, mask = 1,
+	      [0] = {
+		shift = 15, mask = 1,
+		[0] = { -- FP/int conversion.
+		  shift = 31, mask = 1,
+		  [0] = {
+		    shift = 16, mask = 0xff,
+		    [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs",
+		    [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw",
+		    [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs",
+		    [0x26] = "fmovDwNs", [0x27] = "fmovDsNw",
+		    [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs",
+		    [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs",
+		    [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs",
+		    [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd",
+		    [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw",
+		    [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd",
+		    [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd",
+		    [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd",
+		    [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd"
+		  },
+		  {
+		    shift = 16, mask = 0xff,
+		    [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs",
+		    [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx",
+		    [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs",
+		    [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs",
+		    [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs",
+		    [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs",
+		    [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd",
+		    [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx",
+		    [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd",
+		    [0x66] = "fmovDxNd", [0x67] = "fmovDdNx",
+		    [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd",
+		    [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd",
+		    [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd"
+		  }
+		}
+	      },
+	      { -- FP data-processing, 1 source.
+		shift = 31, mask = 1,
+		[0] = {
+		  shift = 22, mask = 3,
+		  [0] = {
+		    shift = 15, mask = 63,
+		    [0] = "fmovDNf", "fabsDNf", "fnegDNf",
+		    "fsqrtDNf", false, "fcvtDdNs", false, false,
+		    "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
+		    "frintaDNf", false, "frintxDNf", "frintiDNf",
+		  },
+		  {
+		    shift = 15, mask = 63,
+		    [0] = "fmovDNf", "fabsDNf", "fnegDNf",
+		    "fsqrtDNf", "fcvtDsNd", false, false, false,
+		    "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
+		    "frintaDNf", false, "frintxDNf", "frintiDNf",
+		  }
+		}
+	      }
+	    },
+	    { -- FP compare.
+	      shift = 31, mask = 1,
+	      [0] = {
+		shift = 14, mask = 3,
+		[0] = {
+		  shift = 23, mask = 1,
+		  [0] = {
+		    shift = 0, mask = 31,
+		    [0] = "fcmpNMf", [8] = "fcmpNZf",
+		    [16] = "fcmpeNMf", [24] = "fcmpeNZf",
+		  }
+		}
+	      }
+	    }
+	  },
+	  { -- FP immediate.
+	    shift = 31, mask = 1,
+	    [0] = {
+	      shift = 5, mask = 31,
+	      [0] = {
+		shift = 23, mask = 1,
+		[0] = "fmovDFf"
+	      }
+	    }
+	  }
+	},
+	{ -- FP conditional compare.
+	  shift = 31, mask = 1,
+	  [0] = {
+	    shift = 23, mask = 1,
+	    [0] = {
+	      shift = 4, mask = 1,
+	      [0] = "fccmpNMVCf", "fccmpeNMVCf"
+	    }
+	  }
+	},
+	{ -- FP data-processing, 2 sources.
+	  shift = 31, mask = 1,
+	  [0] = {
+	    shift = 23, mask = 1,
+	    [0] = {
+	      shift = 12, mask = 15,
+	      [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf",
+	      "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf",
+	      "fnmulDNMf"
+	    }
+	  }
+	},
+	{ -- FP conditional select.
+	  shift = 31, mask = 1,
+	  [0] = {
+	    shift = 23, mask = 1,
+	    [0] = "fcselDNMCf"
+	  }
+	}
+      }
+    },
+    { -- FP data-processing, 3 sources.
+      shift = 31, mask = 1,
+      [0] = {
+	shift = 15, mask = 1,
+	[0] = {
+	  shift = 21, mask = 5,
+	  [0] = "fmaddDNMAf", "fnmaddDNMAf"
+	},
+	{
+	  shift = 21, mask = 5,
+	  [0] = "fmsubDNMAf", "fnmsubDNMAf"
+	}
+      }
+    }
+  }
+}
+
+local map_br = { -- Branches, exception generating and system instructions.
+  shift = 29, mask = 7,
+  [0] = "bB",
+  { -- Compare & branch, immediate.
+    shift = 24, mask = 3,
+    [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw"
+  },
+  { -- Conditional branch, immediate.
+    shift = 24, mask = 3,
+    [0] = {
+      shift = 4, mask = 1,
+      [0] = {
+	shift = 0, mask = 15,
+	[0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB",
+	"bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB"
+      }
+    }
+  }, false, "blB",
+  { -- Compare & branch, immediate.
+    shift = 24, mask = 3,
+    [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx"
+  },
+  {
+    shift = 24, mask = 3,
+    [0] = { -- Exception generation.
+      shift = 0, mask = 0xe0001f,
+      [0x200000] = "brkW"
+    },
+    { -- System instructions.
+      shift = 0, mask = 0x3fffff,
+      [0x03201f] = "nop"
+    },
+    { -- Unconditional branch, register.
+      shift = 0, mask = 0xfffc1f,
+      [0x1f0000] = "brNx", [0x3f0000] = "blrNx",
+      [0x5f0000] = "retNx"
+    },
+  }
+}
+
+local map_init = {
+  shift = 25, mask = 15,
+  [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp,
+  map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp
+}
+
+------------------------------------------------------------------------------
+
+local map_regs = { x = {}, w = {}, d = {}, s = {} }
+
+for i=0,30 do
+  map_regs.x[i] = "x"..i
+  map_regs.w[i] = "w"..i
+  map_regs.d[i] = "d"..i
+  map_regs.s[i] = "s"..i
+end
+map_regs.x[31] = "sp"
+map_regs.w[31] = "wsp"
+map_regs.d[31] = "d31"
+map_regs.s[31] = "s31"
+
+local map_cond = {
+  [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+  "hi", "ls", "ge", "lt", "gt", "le", "al",
+}
+
+local map_shift = { [0] = "lsl", "lsr", "asr", }
+
+local map_extend = {
+  [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
+}
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+  local pos = ctx.pos
+  local extra = ""
+  if ctx.rel then
+    local sym = ctx.symtab[ctx.rel]
+    if sym then
+      extra = "\t->"..sym
+    end
+  end
+  if ctx.hexdump > 0 then
+    ctx.out(format("%08x  %s  %-5s %s%s\n",
+      ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
+  else
+    ctx.out(format("%08x  %-5s %s%s\n",
+      ctx.addr+pos, text, concat(operands, ", "), extra))
+  end
+  ctx.pos = pos + 4
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+  return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
+end
+
+local function match_reg(p, pat, regnum)
+  return map_regs[match(pat, p.."%w-([xwds])")][regnum]
+end
+
+local function fmt_hex32(x)
+  if x < 0 then
+    return tohex(x)
+  else
+    return format("%x", x)
+  end
+end
+
+local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 }
+
+local function decode_imm13(op)
+  local imms = band(rshift(op, 10), 63)
+  local immr = band(rshift(op, 16), 63)
+  if band(op, 0x00400000) == 0 then
+    local len = 5
+    if imms >= 56 then
+      if imms >= 60 then len = 1 else len = 2 end
+    elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end
+    local l = lshift(1, len)-1
+    local s = band(imms, l)
+    local r = band(immr, l)
+    local imm = ror(rshift(-1, 31-s), r)
+    if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end
+    imm = imm * imm13_rep[len]
+    local ix = fmt_hex32(imm)
+    if rshift(op, 31) ~= 0 then
+      return ix..tohex(imm)
+    else
+      return ix
+    end
+  else
+    local lo, hi = -1, 0
+    if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end
+    if immr ~= 0 then
+      lo, hi = ror(lo, immr), ror(hi, immr)
+      local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr))
+      lo, hi = bxor(lo, x), bxor(hi, x)
+      if immr >= 32 then lo, hi = hi, lo end
+    end
+    if hi ~= 0 then
+      return fmt_hex32(hi)..tohex(lo)
+    else
+      return fmt_hex32(lo)
+    end
+  end
+end
+
+local function parse_immpc(op, name)
+  if name == "b" or name == "bl" then
+    return arshift(lshift(op, 6), 4)
+  elseif name == "adr" or name == "adrp" then
+    local immlo = band(rshift(op, 29), 3)
+    local immhi = lshift(arshift(lshift(op, 8), 13), 2)
+    return bor(immhi, immlo)
+  elseif name == "tbz" or name == "tbnz" then
+    return lshift(arshift(lshift(op, 13), 18), 2)
+  else
+    return lshift(arshift(lshift(op, 8), 13), 2)
+  end
+end
+
+local function parse_fpimm8(op)
+  local sign = band(op, 0x100000) == 0 and 1 or -1
+  local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131
+  local frac = 16+band(rshift(op, 13), 15)
+  return sign * frac * 2^exp
+end
+
+local function prefer_bfx(sf, uns, imms, immr)
+  if imms < immr or imms == 31 or imms == 63 then
+    return false
+  end
+  if immr == 0 then
+    if sf == 0 and (imms == 7 or imms == 15) then
+      return false
+    end
+    if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then
+      return false
+    end
+  end
+  return true
+end
+
+-- Disassemble a single instruction.
+local function disass_ins(ctx)
+  local pos = ctx.pos
+  local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+  local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
+  local operands = {}
+  local suffix = ""
+  local last, name, pat
+  local map_reg
+  ctx.op = op
+  ctx.rel = nil
+  last = nil
+  local opat
+  opat = map_init[band(rshift(op, 25), 15)]
+  while type(opat) ~= "string" do
+    if not opat then return unknown(ctx) end
+    opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+  end
+  name, pat = match(opat, "^([a-z0-9]*)(.*)")
+  local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
+  if altname then pat = pat2 end
+  if sub(pat, 1, 1) == "." then
+    local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")
+    suffix = suffix..s2
+    pat = p2
+  end
+
+  local rt = match(pat, "[gf]")
+  if rt then
+    if rt == "g" then
+      map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w
+    else
+      map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s
+    end
+  end
+
+  local second0, immr
+
+  for p in gmatch(pat, ".") do
+    local x = nil
+    if p == "D" then
+      local regnum = band(op, 31)
+      x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+    elseif p == "N" then
+      local regnum = band(rshift(op, 5), 31)
+      x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+    elseif p == "M" then
+      local regnum = band(rshift(op, 16), 31)
+      x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+    elseif p == "A" then
+      local regnum = band(rshift(op, 10), 31)
+      x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+    elseif p == "B" then
+      local addr = ctx.addr + pos + parse_immpc(op, name)
+      ctx.rel = addr
+      x = "0x"..tohex(addr)
+    elseif p == "T" then
+      x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31))
+    elseif p == "V" then
+      x = band(op, 15)
+    elseif p == "C" then
+      x = map_cond[band(rshift(op, 12), 15)]
+    elseif p == "c" then
+      local rn = band(rshift(op, 5), 31)
+      local rm = band(rshift(op, 16), 31)
+      local cond = band(rshift(op, 12), 15)
+      local invc = bxor(cond, 1)
+      x = map_cond[cond]
+      if altname and cond ~= 14 and cond ~= 15 then
+	local a1, a2 = match(altname, "([^|]*)|(.*)")
+	if rn == rm then
+	  local n = #operands
+	  operands[n] = nil
+	  x = map_cond[invc]
+	  if rn ~= 31 then
+	    if a1 then name = a1 else name = altname end
+	  else
+	    operands[n-1] = nil
+	    name = a2
+	  end
+	end
+      end
+    elseif p == "W" then
+      x = band(rshift(op, 5), 0xffff)
+    elseif p == "Y" then
+      x = band(rshift(op, 5), 0xffff)
+      local hw = band(rshift(op, 21), 3)
+      if altname and (hw == 0 or x ~= 0) then
+	name = altname
+      end
+    elseif p == "L" then
+      local rn = map_regs.x[band(rshift(op, 5), 31)]
+      local imm9 = arshift(lshift(op, 11), 23)
+      if band(op, 0x800) ~= 0 then
+	x = "["..rn..", #"..imm9.."]!"
+      else
+	x = "["..rn.."], #"..imm9
+      end
+    elseif p == "U" then
+      local rn = map_regs.x[band(rshift(op, 5), 31)]
+      local sz = band(rshift(op, 30), 3)
+      local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
+      if imm12 ~= 0 then
+	x = "["..rn..", #"..imm12.."]"
+      else
+	x = "["..rn.."]"
+      end
+    elseif p == "K" then
+      local rn = map_regs.x[band(rshift(op, 5), 31)]
+      local imm9 = arshift(lshift(op, 11), 23)
+      if imm9 ~= 0 then
+	x = "["..rn..", #"..imm9.."]"
+      else
+	x = "["..rn.."]"
+      end
+    elseif p == "O" then
+      local rn, rm = map_regs.x[band(rshift(op, 5), 31)]
+      local m = band(rshift(op, 13), 1)
+      if m == 0 then
+	rm = map_regs.w[band(rshift(op, 16), 31)]
+      else
+	rm = map_regs.x[band(rshift(op, 16), 31)]
+      end
+      x = "["..rn..", "..rm
+      local opt = band(rshift(op, 13), 7)
+      local s = band(rshift(op, 12), 1)
+      local sz = band(rshift(op, 30), 3)
+      -- extension to be applied
+      if opt == 3 then
+       if s == 0 then x = x.."]"
+       else x = x..", lsl #"..sz.."]" end
+      elseif opt == 2 or opt == 6 or opt == 7 then
+	if s == 0 then x = x..", "..map_extend[opt].."]"
+	else x = x..", "..map_extend[opt].." #"..sz.."]" end
+      else
+	x = x.."]"
+      end
+    elseif p == "P" then
+      local opcv, sh = rshift(op, 26), 2
+      if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
+      local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
+      local rn = map_regs.x[band(rshift(op, 5), 31)]
+      local ind = band(rshift(op, 23), 3)
+      if ind == 1 then
+	x = "["..rn.."], #"..imm7
+      elseif ind == 2 then
+	if imm7 == 0 then
+	  x = "["..rn.."]"
+	else
+	  x = "["..rn..", #"..imm7.."]"
+	end
+      elseif ind == 3 then
+	x = "["..rn..", #"..imm7.."]!"
+      end
+    elseif p == "I" then
+      local shf = band(rshift(op, 22), 3)
+      local imm12 = band(rshift(op, 10), 0x0fff)
+      local rn, rd = band(rshift(op, 5), 31), band(op, 31)
+      if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then
+	name = altname
+	x = nil
+      elseif shf == 0 then
+	x = imm12
+      elseif shf == 1 then
+	x = imm12..", lsl #12"
+      end
+    elseif p == "i" then
+      x = "#0x"..decode_imm13(op)
+    elseif p == "1" then
+      immr = band(rshift(op, 16), 63)
+      x = immr
+    elseif p == "2" then
+      x = band(rshift(op, 10), 63)
+      if altname then
+	local a1, a2, a3, a4, a5, a6 =
+	  match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)")
+	local sf = band(rshift(op, 26), 32)
+	local uns = band(rshift(op, 30), 1)
+	if prefer_bfx(sf, uns, x, immr) then
+	  name = a2
+	  x = x - immr + 1
+	elseif immr == 0 and x == 7 then
+	  local n = #operands
+	  operands[n] = nil
+	  if sf ~= 0 then
+	    operands[n-1] = gsub(operands[n-1], "x", "w")
+	  end
+	  last = operands[n-1]
+	  name = a6
+	  x = nil
+	elseif immr == 0 and x == 15 then
+	  local n = #operands
+	  operands[n] = nil
+	  if sf ~= 0 then
+	    operands[n-1] = gsub(operands[n-1], "x", "w")
+	  end
+	  last = operands[n-1]
+	  name = a5
+	  x = nil
+	elseif x == 31 or x == 63 then
+	  if x == 31 and immr == 0 and name == "sbfm" then
+	    name = a4
+	    local n = #operands
+	    operands[n] = nil
+	    if sf ~= 0 then
+	      operands[n-1] = gsub(operands[n-1], "x", "w")
+	    end
+	    last = operands[n-1]
+	  else
+	    name = a3
+	  end
+	  x = nil
+	elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then
+	  name = a4
+	  last = "#"..(sf+32 - immr)
+	  operands[#operands] = last
+	  x = nil
+	elseif x < immr then
+	  name = a1
+	  last = "#"..(sf+32 - immr)
+	  operands[#operands] = last
+	  x = x + 1
+	end
+      end
+    elseif p == "3" then
+      x = band(rshift(op, 10), 63)
+      if altname then
+	local a1, a2 = match(altname, "([^|]*)|(.*)")
+	if x < immr then
+	  name = a1
+	  local sf = band(rshift(op, 26), 32)
+	  last = "#"..(sf+32 - immr)
+	  operands[#operands] = last
+	  x = x + 1
+	elseif x >= immr then
+	  name = a2
+	  x = x - immr + 1
+	end
+      end
+    elseif p == "4" then
+      x = band(rshift(op, 10), 63)
+      local rn = band(rshift(op, 5), 31)
+      local rm = band(rshift(op, 16), 31)
+      if altname and rn == rm then
+	local n = #operands
+	operands[n] = nil
+	last = operands[n-1]
+	name = altname
+      end
+    elseif p == "5" then
+      x = band(rshift(op, 16), 31)
+    elseif p == "S" then
+      x = band(rshift(op, 10), 63)
+      if x == 0 then x = nil
+      else x = map_shift[band(rshift(op, 22), 3)].." #"..x end
+    elseif p == "X" then
+      local opt = band(rshift(op, 13), 7)
+      -- Width specifier <R>.
+      if opt ~= 3 and opt ~= 7 then
+	last = map_regs.w[band(rshift(op, 16), 31)]
+	operands[#operands] = last
+      end
+      x = band(rshift(op, 10), 7)
+      -- Extension.
+      if opt == 2 + band(rshift(op, 31), 1) and
+	 band(rshift(op, second0 and 5 or 0), 31) == 31 then
+	if x == 0 then x = nil
+	else x = "lsl #"..x end
+      else
+	if x == 0 then x = map_extend[band(rshift(op, 13), 7)]
+	else x = map_extend[band(rshift(op, 13), 7)].." #"..x end
+      end
+    elseif p == "R" then
+      x = band(rshift(op,21), 3)
+      if x == 0 then x = nil
+      else x = "lsl #"..x*16 end
+    elseif p == "z" then
+      local n = #operands
+      if operands[n] == "sp" then operands[n] = "xzr"
+      elseif operands[n] == "wsp" then operands[n] = "wzr"
+      end
+    elseif p == "Z" then
+      x = 0
+    elseif p == "F" then
+      x = parse_fpimm8(op)
+    elseif p == "g" or p == "f" or p == "x" or p == "w" or
+	   p == "d" or p == "s" then
+      -- These are handled in D/N/M/A.
+    elseif p == "0" then
+      if last == "sp" or last == "wsp" then
+	local n = #operands
+	operands[n] = nil
+	last = operands[n-1]
+	if altname then
+	  local a1, a2 = match(altname, "([^|]*)|(.*)")
+	  if not a1 then
+	    name = altname
+	  elseif second0 then
+	    name, altname = a2, a1
+	  else
+	    name, altname = a1, a2
+	  end
+	end
+      end
+      second0 = true
+    else
+      assert(false)
+    end
+    if x then
+      last = x
+      if type(x) == "number" then x = "#"..x end
+      operands[#operands+1] = x
+    end
+  end
+
+  return putop(ctx, name..suffix, operands)
+end
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+  if not ofs then ofs = 0 end
+  local stop = len and ofs+len or #ctx.code
+  ctx.pos = ofs
+  ctx.rel = nil
+  while ctx.pos < stop do disass_ins(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create(code, addr, out)
+  local ctx = {}
+  ctx.code = code
+  ctx.addr = addr or 0
+  ctx.out = out or io.write
+  ctx.symtab = {}
+  ctx.disass = disass_block
+  ctx.hexdump = 8
+  return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
+end
+
+-- Return register name for RID.
+local function regname(r)
+  if r < 32 then return map_regs.x[r] end
+  return map_regs.d[r-32]
+end
+
+-- Public module functions.
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
+

+ 12 - 0
love/src/jni/LuaJIT-2.1/src/jit/dis_arm64be.lua

@@ -0,0 +1,12 @@
+----------------------------------------------------------------------------
+-- LuaJIT ARM64BE disassembler wrapper module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- ARM64 instructions are always little-endian. So just forward to the
+-- common ARM64 disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+return require((string.match(..., ".*%.") or "").."dis_arm64")
+

+ 35 - 20
love/src/jni/LuaJIT-2.1/src/jit/dis_mips.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPS disassembler module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -11,8 +11,8 @@
 ------------------------------------------------------------------------------
 
 local type = type
-local sub, byte, format = string.sub, string.byte, string.format
-local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local byte, format = string.byte, string.format
+local match, gmatch = string.match, string.gmatch
 local concat = table.concat
 local bit = require("bit")
 local band, bor, tohex = bit.band, bit.bor, bit.tohex
@@ -34,15 +34,17 @@ local map_special = {
   "jrS",	"jalrD1S",	"movzDST",	"movnDST",
   "syscallY",	"breakY",	false,		"sync",
   "mfhiD",	"mthiS",	"mfloD",	"mtloS",
-  false,	false,		false,		false,
+  "dsllvDST",	false,		"dsrlvDST",	"dsravDST",
   "multST",	"multuST",	"divST",	"divuST",
-  false,	false,		false,		false,
+  "dmultST",	"dmultuST",	"ddivST",	"ddivuST",
   "addDST",	"addu|moveDST0", "subDST",	"subu|neguDS0T",
-  "andDST",	"orDST",	"xorDST",	"nor|notDST0",
+  "andDST",	"or|moveDST0",	"xorDST",	"nor|notDST0",
   false,	false,		"sltDST",	"sltuDST",
-  false,	false,		false,		false,
+  "daddDST",	"dadduDST",	"dsubDST",	"dsubuDST",
   "tgeSTZ",	"tgeuSTZ",	"tltSTZ",	"tltuSTZ",
-  "teqSTZ",	false,		"tneSTZ",
+  "teqSTZ",	false,		"tneSTZ",	false,
+  "dsllDTA",	false,		"dsrlDTA",	"dsraDTA",
+  "dsll32DTA",	false,		"dsrl32DTA",	"dsra32DTA",
 }
 
 local map_special2 = {
@@ -60,11 +62,17 @@ local map_bshfl = {
   [24] = "sehDT",
 }
 
+local map_dbshfl = {
+  shift = 6, mask = 31,
+  [2] = "dsbhDT",
+  [5] = "dshdDT",
+}
+
 local map_special3 = {
   shift = 0, mask = 63,
-  [0] = "extTSAK", [4] = "insTSAL",
-  [32] = map_bshfl,
-  [59] = "rdhwrTD",
+  [0]  = "extTSAK", [1]  = "dextmTSAP", [3]  = "dextTSAK",
+  [4]  = "insTSAL", [6]  = "dinsuTSEQ", [7]  = "dinsTSAL",
+  [32] = map_bshfl, [36] = map_dbshfl,  [59] = "rdhwrTD",
 }
 
 local map_regimm = {
@@ -178,8 +186,8 @@ local map_cop1bc = {
 
 local map_cop1 = {
   shift = 21, mask = 31,
-  [0] = "mfc1TG", false,	"cfc1TG",	"mfhc1TG",
-  "mtc1TG",	false,		"ctc1TG",	"mthc1TG",
+  [0] = "mfc1TG", "dmfc1TG",	"cfc1TG",	"mfhc1TG",
+  "mtc1TG",	"dmtc1TG",	"ctc1TG",	"mthc1TG",
   map_cop1bc,	false,		false,		false,
   false,	false,		false,		false,
   map_cop1s,	map_cop1d,	false,		false,
@@ -213,16 +221,16 @@ local map_pri = {
   "andiTSU",	"ori|liTS0U",	"xoriTSU",	"luiTU",
   map_cop0,	map_cop1,	false,		map_cop1x,
   "beql|beqzlST0B",	"bnel|bnezlST0B",	"blezlSB",	"bgtzlSB",
-  false,	false,		false,		false,
-  map_special2,	false,		false,		map_special3,
+  "daddiTSI",	"daddiuTSI",	false,		false,
+  map_special2,	"jalxJ",	false,		map_special3,
   "lbTSO",	"lhTSO",	"lwlTSO",	"lwTSO",
   "lbuTSO",	"lhuTSO",	"lwrTSO",	false,
   "sbTSO",	"shTSO",	"swlTSO",	"swTSO",
   false,	false,		"swrTSO",	"cacheNSO",
   "llTSO",	"lwc1HSO",	"lwc2TSO",	"prefNSO",
-  false,	"ldc1HSO",	"ldc2TSO",	false,
+  false,	"ldc1HSO",	"ldc2TSO",	"ldTSO",
   "scTSO",	"swc1HSO",	"swc2TSO",	false,
-  false,	"sdc1HSO",	"sdc2TSO",	false,
+  false,	"sdc1HSO",	"sdc2TSO",	"sdTSO",
 }
 
 ------------------------------------------------------------------------------
@@ -306,6 +314,8 @@ local function disass_ins(ctx)
       x = "f"..band(rshift(op, 21), 31)
     elseif p == "A" then
       x = band(rshift(op, 6), 31)
+    elseif p == "E" then
+      x = band(rshift(op, 6), 31) + 32
     elseif p == "M" then
       x = band(rshift(op, 11), 31)
     elseif p == "N" then
@@ -315,8 +325,12 @@ local function disass_ins(ctx)
       if x == 0 then x = nil end
     elseif p == "K" then
       x = band(rshift(op, 11), 31) + 1
+    elseif p == "P" then
+      x = band(rshift(op, 11), 31) + 33
     elseif p == "L" then
       x = band(rshift(op, 11), 31) - last + 1
+    elseif p == "Q" then
+      x = band(rshift(op, 11), 31) - last + 33
     elseif p == "I" then
       x = arshift(lshift(op, 16), 16)
     elseif p == "U" then
@@ -330,11 +344,12 @@ local function disass_ins(ctx)
     elseif p == "B" then
       x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4
       ctx.rel = x
-      x = "0x"..tohex(x)
+      x = format("0x%08x", x)
     elseif p == "J" then
-      x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4
+      local a = ctx.addr + ctx.pos
+      x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4
       ctx.rel = x
-      x = "0x"..tohex(x)
+      x = format("0x%08x", x)
     elseif p == "V" then
       x = band(rshift(op, 8), 7)
       if x == 0 then x = nil end

+ 17 - 0
love/src/jni/LuaJIT-2.1/src/jit/dis_mips64.lua

@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64 disassembler wrapper module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the big-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create,
+  disass = dis_mips.disass,
+  regname = dis_mips.regname
+}
+

+ 17 - 0
love/src/jni/LuaJIT-2.1/src/jit/dis_mips64el.lua

@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64EL disassembler wrapper module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the little-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create_el,
+  disass = dis_mips.disass_el,
+  regname = dis_mips.regname
+}
+

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/jit/dis_mipsel.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPSEL disassembler wrapper module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the little-endian functions from the

+ 2 - 2
love/src/jni/LuaJIT-2.1/src/jit/dis_ppc.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT PPC disassembler module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -13,7 +13,7 @@
 ------------------------------------------------------------------------------
 
 local type = type
-local sub, byte, format = string.sub, string.byte, string.format
+local byte, format = string.byte, string.format
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 local concat = table.concat
 local bit = require("bit")

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/jit/dis_x64.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT x64 disassembler wrapper module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the 64 bit functions from the combined

+ 37 - 7
love/src/jni/LuaJIT-2.1/src/jit/dis_x86.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT x86/x64 disassembler module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -158,8 +158,8 @@ local map_opc2 = {
 "||punpcklqdqXrvm","||punpckhqdqXrvm",
 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
 --7x
-"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
-"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
+"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
+"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
 "pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
 nil,nil,
@@ -239,8 +239,30 @@ nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
 --8x
 [0x8c] = "||pmaskmovXrvVSm",
 [0x8e] = "||pmaskmovVSmXvr",
+--9x
+[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
+[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
+[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
+[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
+[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
+--Ax
+[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
+[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
+[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
+[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
+[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
+--Bx
+[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
+[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
+[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
+[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
+[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
+--Dx
+[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
+[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
 --Fx
 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
+[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv",
 },
 
 ["3a"] = { -- [66] 0f 3a xx
@@ -262,12 +284,16 @@ nil,nil,nil,nil,
 [0x40] = "||dppsXrvmu",
 [0x41] = "||dppdXrvmu",
 [0x42] = "||mpsadbwXrvmu",
+[0x44] = "||pclmulqdqXrvmu",
 [0x46] = "||perm2i128Xrvmu",
 [0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
 [0x4c] = "||pblendvbXrvmb",
 --6x
 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
+[0xdf] = "||aeskeygenassistXrmu",
+--Fx
+[0xf0] = "||| rorxVrmu",
 },
 }
 
@@ -409,8 +435,8 @@ local function putop(ctx, text, operands)
 	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
 	      (ctx.vexl and "l" or "")
     if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
-    if t ~= "" then text = ctx.rex.."."..t.." "..text
-    elseif ctx.rex == "vex" then text = "v"..text end
+    if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "")
+    elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end
     ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
     ctx.rex = false; ctx.vexl = false; ctx.vexv = false
   end
@@ -475,7 +501,7 @@ local function putpat(ctx, name, pat)
   local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
   local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
 
-  -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
+  -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
   for p in gmatch(pat, ".") do
     local x = nil
     if p == "V" or p == "U" then
@@ -498,6 +524,9 @@ local function putpat(ctx, name, pat)
       sz = ctx.o16 and "X" or "M"; ctx.o16 = false
       if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
       regs = map_regs[sz]
+    elseif p == "H" then
+      name = name..(ctx.rexw and "d" or "s")
+      ctx.rexw = false
     elseif p == "S" then
       name = name..lower(sz)
     elseif p == "s" then
@@ -727,6 +756,7 @@ map_act = {
   V = putpat, U = putpat, T = putpat,
   M = putpat, X = putpat, P = putpat,
   F = putpat, G = putpat, Y = putpat,
+  H = putpat,
 
   -- Collect prefixes.
   [":"] = function(ctx, name, pat)
@@ -810,7 +840,7 @@ map_act = {
       m = b%32; b = (b-m)/32
       local nb = b%2; b = (b-nb)/2
       if nb == 0 then ctx.rexb = true end
-      local nx = b%2; b = (b-nx)/2
+      local nx = b%2
       if nx == 0 then ctx.rexx = true end
       b = byte(ctx.code, pos, pos)
       if not b then return incomplete(ctx) end

+ 17 - 12
love/src/jni/LuaJIT-2.1/src/jit/dump.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT compiler dump module.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -63,9 +63,9 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
 local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
 local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
 local bit = require("bit")
-local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
+local band, shr, tohex = bit.band, bit.rshift, bit.tohex
 local sub, gsub, format = string.sub, string.gsub, string.format
-local byte, char, rep = string.byte, string.char, string.rep
+local byte, rep = string.byte, string.rep
 local type, tostring = type, tostring
 local stdout, stderr = io.stdout, io.stderr
 
@@ -85,7 +85,7 @@ local nexitsym = 0
 local function fillsymtab_tr(tr, nexit)
   local t = {}
   symtabmt.__index = t
-  if jit.arch == "mips" or jit.arch == "mipsel" then
+  if jit.arch:sub(1, 4) == "mips" then
     t[traceexitstub(tr, 0)] = "exit"
     return
   end
@@ -213,7 +213,7 @@ local colortype_ansi = {
   "\027[35m%s\027[m",
 }
 
-local function colorize_text(s, t)
+local function colorize_text(s)
   return s
 end
 
@@ -310,15 +310,17 @@ local function fmtfunc(func, pc)
   end
 end
 
-local function formatk(tr, idx)
+local function formatk(tr, idx, sn)
   local k, t, slot = tracek(tr, idx)
   local tn = type(k)
   local s
   if tn == "number" then
-    if k == 2^52+2^51 then
+    if band(sn or 0, 0x30000) ~= 0 then
+      s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
+    elseif k == 2^52+2^51 then
       s = "bias"
     else
-      s = format("%+.14g", k)
+      s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
     end
   elseif tn == "string" then
     s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@@ -331,11 +333,13 @@ local function formatk(tr, idx)
       s = format("userdata:%p", k)
     else
       s = format("[%p]", k)
-      if s == "[0x00000000]" then s = "NULL" end
+      if s == "[NULL]" then s = "NULL" end
     end
   elseif t == 21 then -- int64_t
     s = sub(tostring(k), 1, -3)
     if sub(s, 1, 1) ~= "-" then s = "+"..s end
+  elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)
+    return "----" -- Special case for LJ_FR2 slot 1.
   else
     s = tostring(k) -- For primitives.
   end
@@ -354,7 +358,7 @@ local function printsnap(tr, snap)
       n = n + 1
       local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
       if ref < 0 then
-	out:write(formatk(tr, ref))
+	out:write(formatk(tr, ref, sn))
       elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
 	out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
       else
@@ -552,7 +556,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
   if what == "start" then
     if dumpmode.H then out:write('<pre class="ljdump">\n') end
     out:write("---- TRACE ", tr, " ", what)
-    if otr then out:write(" ", otr, "/", oex) end
+    if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
     out:write(" ", fmtfunc(func, pc), "\n")
   elseif what == "stop" or what == "abort" then
     out:write("---- TRACE ", tr, " ", what)
@@ -651,7 +655,8 @@ end
 local function dumpon(opt, outfile)
   if active then dumpoff() end
 
-  local colormode = os.getenv("COLORTERM") and "A" or "T"
+  local term = os.getenv("TERM")
+  local colormode = (term and term:match("color") or os.getenv("COLORTERM")) and "A" or "T"
   if opt then
     opt = gsub(opt, "[TAH]", function(mode) colormode = mode; return ""; end)
   end

+ 3 - 2
love/src/jni/LuaJIT-2.1/src/jit/p.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT profiler.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -120,7 +120,7 @@ end
 -- Show top N list.
 local function prof_top(count1, count2, samples, indent)
   local t, n = {}, 0
-  for k, v in pairs(count1) do
+  for k in pairs(count1) do
     n = n + 1
     t[n] = k
   end
@@ -156,6 +156,7 @@ local function prof_annotate(count1, samples)
     ms = math.max(ms, v)
     if pct >= prof_min then
       local file, line = k:match("^(.*):(%d+)$")
+      if not file then file = k; line = 0 end
       local fl = files[file]
       if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
       line = tonumber(line)

+ 2 - 2
love/src/jni/LuaJIT-2.1/src/jit/v.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- Verbose mode of the LuaJIT compiler.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -99,7 +99,7 @@ end
 local function dump_trace(what, tr, func, pc, otr, oex)
   if what == "start" then
     startloc = fmtfunc(func, pc)
-    startex = otr and "("..otr.."/"..oex..") " or ""
+    startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or ""
   else
     if what == "abort" then
       local loc = fmtfunc(func, pc)

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/jit/zone.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT profiler zones.
 --
--- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --

+ 14 - 20
love/src/jni/LuaJIT-2.1/src/lauxlib.h

@@ -15,9 +15,6 @@
 #include "lua.h"
 
 
-#define luaL_getn(L,i)          ((int)lua_objlen(L, i))
-#define luaL_setn(L,i,j)        ((void)0)  /* no op! */
-
 /* extra error code for `luaL_load' */
 #define LUA_ERRFILE     (LUA_ERRERR+1)
 
@@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...);
 LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
                                    const char *const lst[]);
 
+/* pre-defined references */
+#define LUA_NOREF       (-2)
+#define LUA_REFNIL      (-1)
+
 LUALIB_API int (luaL_ref) (lua_State *L, int t);
 LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
 
@@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz,
 				   const char *name, const char *mode);
 LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
 				int level);
+LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup);
+LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname,
+				   int sizehint);
+LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname);
+LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname);
 
 
 /*
@@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
 
 #define luaL_opt(L,f,n,d)	(lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
 
+/* From Lua 5.2. */
+#define luaL_newlibtable(L, l) \
+	lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1)
+#define luaL_newlib(L, l)	(luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0))
+
 /*
 ** {======================================================
 ** Generic Buffer manipulation
@@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B);
 
 /* }====================================================== */
 
-
-/* compatibility with ref system */
-
-/* pre-defined references */
-#define LUA_NOREF       (-2)
-#define LUA_REFNIL      (-1)
-
-#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \
-      (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0))
-
-#define lua_unref(L,ref)        luaL_unref(L, LUA_REGISTRYINDEX, (ref))
-
-#define lua_getref(L,ref)       lua_rawgeti(L, LUA_REGISTRYINDEX, (ref))
-
-
-#define luaL_reg	luaL_Reg
-
 #endif

+ 47 - 29
love/src/jni/LuaJIT-2.1/src/lib_aux.c

@@ -1,6 +1,6 @@
 /*
 ** Auxiliary library for the Lua/C API.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major parts taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx,
 static int libsize(const luaL_Reg *l)
 {
   int size = 0;
-  for (; l->name; l++) size++;
+  for (; l && l->name; l++) size++;
   return size;
 }
 
+LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint)
+{
+  luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
+  lua_getfield(L, -1, modname);
+  if (!lua_istable(L, -1)) {
+    lua_pop(L, 1);
+    if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL)
+      lj_err_callerv(L, LJ_ERR_BADMODN, modname);
+    lua_pushvalue(L, -1);
+    lua_setfield(L, -3, modname);  /* _LOADED[modname] = new table. */
+  }
+  lua_remove(L, -2);  /* Remove _LOADED table. */
+}
+
 LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
 			     const luaL_Reg *l, int nup)
 {
   lj_lib_checkfpu(L);
   if (libname) {
-    int size = libsize(l);
-    /* check whether lib already exists */
-    luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
-    lua_getfield(L, -1, libname);  /* get _LOADED[libname] */
-    if (!lua_istable(L, -1)) {  /* not found? */
-      lua_pop(L, 1);  /* remove previous result */
-      /* try global variable (and create one if it does not exist) */
-      if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL)
-	lj_err_callerv(L, LJ_ERR_BADMODN, libname);
-      lua_pushvalue(L, -1);
-      lua_setfield(L, -3, libname);  /* _LOADED[libname] = new table */
-    }
-    lua_remove(L, -2);  /* remove _LOADED table */
-    lua_insert(L, -(nup+1));  /* move library table to below upvalues */
+    luaL_pushmodule(L, libname, libsize(l));
+    lua_insert(L, -(nup + 1));  /* Move module table below upvalues. */
   }
-  for (; l->name; l++) {
-    int i;
-    for (i = 0; i < nup; i++)  /* copy upvalues to the top */
-      lua_pushvalue(L, -nup);
-    lua_pushcclosure(L, l->func, nup);
-    lua_setfield(L, -(nup+2), l->name);
-  }
-  lua_pop(L, nup);  /* remove upvalues */
+  if (l)
+    luaL_setfuncs(L, l, nup);
+  else
+    lua_pop(L, nup);  /* Remove upvalues. */
 }
 
 LUALIB_API void luaL_register(lua_State *L, const char *libname,
@@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname,
   luaL_openlib(L, libname, l, 0);
 }
 
+LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup)
+{
+  luaL_checkstack(L, nup, "too many upvalues");
+  for (; l->name; l++) {
+    int i;
+    for (i = 0; i < nup; i++)  /* Copy upvalues to the top. */
+      lua_pushvalue(L, -nup);
+    lua_pushcclosure(L, l->func, nup);
+    lua_setfield(L, -(nup + 2), l->name);
+  }
+  lua_pop(L, nup);  /* Remove upvalues. */
+}
+
 LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
 				 const char *p, const char *r)
 {
@@ -207,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
 
 LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
 {
-  while (l--)
-    luaL_addchar(B, *s++);
+  if (l <= bufffree(B)) {
+    memcpy(B->p, s, l);
+    B->p += l;
+  } else {
+    emptybuffer(B);
+    lua_pushlstring(B->L, s, l);
+    B->lvl++;
+    adjuststack(B);
+  }
 }
 
 LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
@@ -302,7 +320,7 @@ static int panic(lua_State *L)
 
 #ifdef LUAJIT_USE_SYSMALLOC
 
-#if LJ_64 && !defined(LUAJIT_USE_VALGRIND)
+#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
 #error "Must use builtin allocator for 64 bit target"
 #endif
 
@@ -334,7 +352,7 @@ LUALIB_API lua_State *luaL_newstate(void)
   lua_State *L;
   void *ud = lj_alloc_create();
   if (ud == NULL) return NULL;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   L = lj_state_newstate(lj_alloc_f, ud);
 #else
   L = lua_newstate(lj_alloc_f, ud);
@@ -343,7 +361,7 @@ LUALIB_API lua_State *luaL_newstate(void)
   return L;
 }
 
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
 {
   UNUSED(f); UNUSED(ud);

+ 38 - 22
love/src/jni/LuaJIT-2.1/src/lib_base.c

@@ -1,6 +1,6 @@
 /*
 ** Base and coroutine library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -23,6 +23,7 @@
 #include "lj_tab.h"
 #include "lj_meta.h"
 #include "lj_state.h"
+#include "lj_frame.h"
 #if LJ_HASFFI
 #include "lj_ctype.h"
 #include "lj_cconv.h"
@@ -41,13 +42,13 @@
 
 LJLIB_ASM(assert)		LJLIB_REC(.)
 {
-  GCstr *s;
   lj_lib_checkany(L, 1);
-  s = lj_lib_optstr(L, 2);
-  if (s)
-    lj_err_callermsg(L, strdata(s));
-  else
+  if (L->top == L->base+1)
     lj_err_caller(L, LJ_ERR_ASSERT);
+  else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
+    lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
+  else
+    lj_err_run(L);
   return FFH_UNREACHABLE;
 }
 
@@ -286,18 +287,27 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
   } else {
     const char *p = strdata(lj_lib_checkstr(L, 1));
     char *ep;
+    unsigned int neg = 0;
     unsigned long ul;
     if (base < 2 || base > 36)
       lj_err_arg(L, 2, LJ_ERR_BASERNG);
-    ul = strtoul(p, &ep, base);
-    if (p != ep) {
-      while (lj_char_isspace((unsigned char)(*ep))) ep++;
-      if (*ep == '\0') {
-	if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
-	  setintV(L->base-1-LJ_FR2, (int32_t)ul);
-	else
-	  setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
-	return FFH_RES(1);
+    while (lj_char_isspace((unsigned char)(*p))) p++;
+    if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
+    if (lj_char_isalnum((unsigned char)(*p))) {
+      ul = strtoul(p, &ep, base);
+      if (p != ep) {
+	while (lj_char_isspace((unsigned char)(*ep))) ep++;
+	if (*ep == '\0') {
+	  if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
+	    if (neg) ul = -ul;
+	    setintV(L->base-1-LJ_FR2, (int32_t)ul);
+	  } else {
+	    lua_Number n = (lua_Number)ul;
+	    if (neg) n = -n;
+	    setnumV(L->base-1-LJ_FR2, n);
+	  }
+	  return FFH_RES(1);
+	}
       }
     }
   }
@@ -345,7 +355,7 @@ LJLIB_ASM_(xpcall)		LJLIB_REC(.)
 
 static int load_aux(lua_State *L, int status, int envarg)
 {
-  if (status == 0) {
+  if (status == LUA_OK) {
     if (tvistab(L->base+envarg-1)) {
       GCfunc *fn = funcV(L->top-1);
       GCtab *t = tabV(L->base+envarg-1);
@@ -418,7 +428,7 @@ LJLIB_CF(dofile)
   GCstr *fname = lj_lib_optstr(L, 1);
   setnilV(L->top);
   L->top = L->base+1;
-  if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0)
+  if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK)
     lua_error(L);
   lua_call(L, 0, LUA_MULTRET);
   return (int)(L->top - L->base) - 1;
@@ -492,14 +502,14 @@ LJLIB_CF(print)
     lua_gettable(L, LUA_GLOBALSINDEX);
     tv = L->top-1;
   }
-  shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
+  shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring)
+              && !gcrefu(basemt_it(G(L), LJ_TNUMX));
   for (i = 0; i < nargs; i++) {
     cTValue *o = &L->base[i];
-    char buf[STRFMT_MAXBUF_NUM];
     const char *str;
     size_t size;
     MSize len;
-    if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) {
+    if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) {
       size = len;
     } else {
       copyTV(L, L->top+1, o);
@@ -537,7 +547,7 @@ LJLIB_CF(coroutine_status)
   co = threadV(L->base);
   if (co == L) s = "running";
   else if (co->status == LUA_YIELD) s = "suspended";
-  else if (co->status != 0) s = "dead";
+  else if (co->status != LUA_OK) s = "dead";
   else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
   else if (co->top == co->base) s = "dead";
   else s = "suspended";
@@ -558,6 +568,12 @@ LJLIB_CF(coroutine_running)
 #endif
 }
 
+LJLIB_CF(coroutine_isyieldable)
+{
+  setboolV(L->top++, cframe_canyield(L->cframe));
+  return 1;
+}
+
 LJLIB_CF(coroutine_create)
 {
   lua_State *L1;
@@ -577,7 +593,7 @@ LJLIB_ASM(coroutine_yield)
 static int ffh_resume(lua_State *L, lua_State *co, int wrap)
 {
   if (co->cframe != NULL || co->status > LUA_YIELD ||
-      (co->status == 0 && co->top == co->base)) {
+      (co->status == LUA_OK && co->top == co->base)) {
     ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
     if (wrap) lj_err_caller(L, em);
     setboolV(L->base-1-LJ_FR2, 0);

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/lib_bit.c

@@ -1,6 +1,6 @@
 /*
 ** Bit manipulation library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lib_bit_c

+ 5 - 5
love/src/jni/LuaJIT-2.1/src/lib_debug.c

@@ -1,6 +1,6 @@
 /*
 ** Debug library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue)
 
 /* ------------------------------------------------------------------------ */
 
-static const char KEY_HOOK = 'h';
+#define KEY_HOOK	((void *)0x3004)
 
 static void hookf(lua_State *L, lua_Debug *ar)
 {
   static const char *const hooknames[] =
     {"call", "return", "line", "count", "tail return"};
-  lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+  lua_pushlightuserdata(L, KEY_HOOK);
   lua_rawget(L, LUA_REGISTRYINDEX);
   if (lua_isfunction(L, -1)) {
     lua_pushstring(L, hooknames[(int)ar->event]);
@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook)
     count = luaL_optint(L, arg+3, 0);
     func = hookf; mask = makemask(smask, count);
   }
-  lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+  lua_pushlightuserdata(L, KEY_HOOK);
   lua_pushvalue(L, arg+1);
   lua_rawset(L, LUA_REGISTRYINDEX);
   lua_sethook(L, func, mask, count);
@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook)
   if (hook != NULL && hook != hookf) {  /* external hook? */
     lua_pushliteral(L, "external hook");
   } else {
-    lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+    lua_pushlightuserdata(L, KEY_HOOK);
     lua_rawget(L, LUA_REGISTRYINDEX);   /* get hook */
   }
   lua_pushstring(L, unmakemask(mask, buff));

+ 22 - 25
love/src/jni/LuaJIT-2.1/src/lib_ffi.c

@@ -1,6 +1,6 @@
 /*
 ** FFI library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lib_ffi_c
@@ -194,7 +194,7 @@ LJLIB_CF(ffi_meta___eq)		LJLIB_REC(cdata_arith MM_eq)
 
 LJLIB_CF(ffi_meta___len)	LJLIB_REC(cdata_arith MM_len)
 {
-  return ffi_arith(L);
+  return lj_carith_len(L);
 }
 
 LJLIB_CF(ffi_meta___lt)		LJLIB_REC(cdata_arith MM_lt)
@@ -505,10 +505,7 @@ LJLIB_CF(ffi_new)	LJLIB_REC(.)
   }
   if (sz == CTSIZE_INVALID)
     lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
-  if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
-    cd = lj_cdata_new(cts, id, sz);
-  else
-    cd = lj_cdata_newv(L, id, sz, ctype_align(info));
+  cd = lj_cdata_newx(cts, id, sz, info);
   setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
   lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
@@ -723,47 +720,47 @@ LJLIB_CF(ffi_fill)	LJLIB_REC(.)
   return 0;
 }
 
-#define H_(le, be)	LJ_ENDIAN_SELECT(0x##le, 0x##be)
-
 /* Test ABI string. */
 LJLIB_CF(ffi_abi)	LJLIB_REC(.)
 {
   GCstr *s = lj_lib_checkstr(L, 1);
-  int b = 0;
-  switch (s->hash) {
+  int b = lj_cparse_case(s,
 #if LJ_64
-  case H_(849858eb,ad35fd06): b = 1; break;  /* 64bit */
+    "\00564bit"
 #else
-  case H_(662d3c79,d0e22477): b = 1; break;  /* 32bit */
+    "\00532bit"
 #endif
 #if LJ_ARCH_HASFPU
-  case H_(e33ee463,e33ee463): b = 1; break;  /* fpu */
+    "\003fpu"
 #endif
 #if LJ_ABI_SOFTFP
-  case H_(61211a23,c2e8c81c): b = 1; break;  /* softfp */
+    "\006softfp"
 #else
-  case H_(539417a8,8ce0812f): b = 1; break;  /* hardfp */
+    "\006hardfp"
 #endif
 #if LJ_ABI_EABI
-  case H_(2182df8f,f2ed1152): b = 1; break;  /* eabi */
+    "\004eabi"
 #endif
 #if LJ_ABI_WIN
-  case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
+    "\003win"
+#endif
+#if LJ_TARGET_UWP
+    "\003uwp"
+#endif
+#if LJ_LE
+    "\002le"
+#else
+    "\002be"
 #endif
-  case H_(3af93066,1f001464): b = 1; break;  /* le/be */
 #if LJ_GC64
-  case H_(9e89d2c9,13c83c92): b = 1; break;  /* gc64 */
+    "\004gc64"
 #endif
-  default:
-    break;
-  }
+  ) >= 0;
   setboolV(L->top-1, b);
   setboolV(&G(L)->tmptv2, b);  /* Remember for trace recorder. */
   return 1;
 }
 
-#undef H_
-
 LJLIB_PUSH(top-8) LJLIB_SET(!)  /* Store reference to miscmap table. */
 
 LJLIB_CF(ffi_metatype)
@@ -832,7 +829,7 @@ static GCtab *ffi_finalizer(lua_State *L)
   settabV(L, L->top++, t);
   setgcref(t->metatable, obj2gco(t));
   setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
-	  lj_str_newlit(L, "K"));
+	  lj_str_newlit(L, "k"));
   t->nomm = (uint8_t)(~(1u<<MM_mode));
   return t;
 }

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/lib_init.c

@@ -1,6 +1,6 @@
 /*
 ** Library initialization.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major parts taken verbatim from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

+ 9 - 11
love/src/jni/LuaJIT-2.1/src/lib_io.c

@@ -1,6 +1,6 @@
 /*
 ** I/O library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -99,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
     int stat = -1;
 #if LJ_TARGET_POSIX
     stat = pclose(iof->fp);
-#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE
+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
     stat = _pclose(iof->fp);
 #else
     lua_assert(0);
@@ -203,13 +203,12 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
     for (n = start; nargs-- && ok; n++) {
       if (tvisstr(L->base+n)) {
 	const char *p = strVdata(L->base+n);
-	if (p[0] != '*')
-	  lj_err_arg(L, n+1, LJ_ERR_INVOPT);
-	if (p[1] == 'n')
+	if (p[0] == '*') p++;
+	if (p[0] == 'n')
 	  ok = io_file_readnum(L, fp);
-	else if ((p[1] & ~0x20) == 'L')
-	  ok = io_file_readline(L, fp, (p[1] == 'l'));
-	else if (p[1] == 'a')
+	else if ((p[0] & ~0x20) == 'L')
+	  ok = io_file_readline(L, fp, (p[0] == 'l'));
+	else if (p[0] == 'a')
 	  io_file_readall(L, fp);
 	else
 	  lj_err_arg(L, n+1, LJ_ERR_INVFMT);
@@ -232,9 +231,8 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
   cTValue *tv;
   int status = 1;
   for (tv = L->base+start; tv < L->top; tv++) {
-    char buf[STRFMT_MAXBUF_NUM];
     MSize len;
-    const char *p = lj_strfmt_wstrnum(buf, tv, &len);
+    const char *p = lj_strfmt_wstrnum(L, tv, &len);
     if (!p)
       lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
     status = status && (fwrite(p, 1, len, fp) == len);
@@ -408,7 +406,7 @@ LJLIB_CF(io_open)
 
 LJLIB_CF(io_popen)
 {
-#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE)
+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP)
   const char *fname = strdata(lj_lib_checkstr(L, 1));
   GCstr *s = lj_lib_optstr(L, 2);
   const char *mode = s ? strdata(s) : "r";

+ 14 - 4
love/src/jni/LuaJIT-2.1/src/lib_jit.c

@@ -1,6 +1,6 @@
 /*
 ** JIT library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lib_jit_c
@@ -204,6 +204,7 @@ LJLIB_CF(jit_util_funcinfo)
     lua_setfield(L, -2, "source");
     lj_debug_pushloc(L, pt, pc);
     lua_setfield(L, -2, "loc");
+    setprotoV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "proto")), pt);
   } else {
     GCfunc *fn = funcV(L->base);
     GCtab *t;
@@ -668,6 +669,11 @@ static uint32_t jit_cpudetect(lua_State *L)
       if (fam >= 0x00000f00)  /* K8, K10. */
 	flags |= JIT_F_PREFER_IMUL;
     }
+    if (vendor[0] >= 7) {
+      uint32_t xfeatures[4];
+      lj_vm_cpuid(7, xfeatures);
+      flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
+    }
 #endif
   }
   /* Check for required instruction set support on x86 (unnecessary on x64). */
@@ -710,15 +716,19 @@ static uint32_t jit_cpudetect(lua_State *L)
 #if LJ_HASJIT
   /* Compile-time MIPS CPU detection. */
 #if LJ_ARCH_VERSION >= 20
-  flags |= JIT_F_MIPS32R2;
+  flags |= JIT_F_MIPSXXR2;
 #endif
   /* Runtime MIPS CPU detection. */
 #if defined(__GNUC__)
-  if (!(flags & JIT_F_MIPS32R2)) {
+  if (!(flags & JIT_F_MIPSXXR2)) {
     int x;
+#ifdef __mips16
+    x = 0;  /* Runtime detection is difficult. Ensure optimal -march flags. */
+#else
     /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
     __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
-    if (x) flags |= JIT_F_MIPS32R2;  /* Either 0x80000000 (R2) or 0 (R1). */
+#endif
+    if (x) flags |= JIT_F_MIPSXXR2;  /* Either 0x80000000 (R2) or 0 (R1). */
   }
 #endif
 #endif

+ 1 - 5
love/src/jni/LuaJIT-2.1/src/lib_math.c

@@ -1,6 +1,6 @@
 /*
 ** Math library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include <math.h>
@@ -221,10 +221,6 @@ LUALIB_API int luaopen_math(lua_State *L)
   rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
   rs->valid = 0;  /* Use lazy initialization to save some time on startup. */
   LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
-#if defined(LUA_COMPAT_MOD) && !LJ_52
-  lua_getfield(L, -1, "fmod");
-  lua_setfield(L, -2, "mod");
-#endif
   return 1;
 }
 

+ 3 - 3
love/src/jni/LuaJIT-2.1/src/lib_os.c

@@ -1,6 +1,6 @@
 /*
 ** OS library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -205,12 +205,12 @@ LJLIB_CF(os_date)
     setboolfield(L, "isdst", stm->tm_isdst);
   } else if (*s) {
     SBuf *sb = &G(L)->tmpbuf;
-    MSize sz = 0;
+    MSize sz = 0, retry = 4;
     const char *q;
     for (q = s; *q; q++)
       sz += (*q == '%') ? 30 : 1;  /* Overflow doesn't matter. */
     setsbufL(sb, L);
-    for (;;) {
+    while (retry--) {  /* Limit growth for invalid format or empty result. */
       char *buf = lj_buf_need(sb, sz);
       size_t len = strftime(buf, sbufsz(sb), s, stm);
       if (len) {

+ 37 - 24
love/src/jni/LuaJIT-2.1/src/lib_package.c

@@ -1,6 +1,6 @@
 /*
 ** Package library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
 BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
 #endif
 
+#if LJ_TARGET_UWP
+void *LJ_WIN_LOADLIBA(const char *path)
+{
+  DWORD err = GetLastError();
+  wchar_t wpath[256];
+  HANDLE lib = NULL;
+  if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
+    lib = LoadPackagedLibrary(wpath, 0);
+  }
+  SetLastError(err);
+  return lib;
+}
+#endif
+
 #undef setprogdir
 
 static void setprogdir(lua_State *L)
@@ -119,7 +133,7 @@ static void ll_unloadlib(void *lib)
 
 static void *ll_load(lua_State *L, const char *path, int gl)
 {
-  HINSTANCE lib = LoadLibraryExA(path, NULL, 0);
+  HINSTANCE lib = LJ_WIN_LOADLIBA(path);
   if (lib == NULL) pusherror(L);
   UNUSED(gl);
   return lib;
@@ -132,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
   return f;
 }
 
+#if LJ_TARGET_UWP
+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
+#endif
+
 static const char *ll_bcsym(void *lib, const char *sym)
 {
   if (lib) {
     return (const char *)GetProcAddress((HINSTANCE)lib, sym);
   } else {
+#if LJ_TARGET_UWP
+    return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
+#else
     HINSTANCE h = GetModuleHandleA(NULL);
     const char *p = (const char *)GetProcAddress(h, sym);
     if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
 					(const char *)ll_bcsym, &h))
       p = (const char *)GetProcAddress(h, sym);
     return p;
+#endif
   }
 }
 
@@ -193,8 +215,7 @@ static void **ll_register(lua_State *L, const char *path)
     lua_pop(L, 1);
     plib = (void **)lua_newuserdata(L, sizeof(void *));
     *plib = NULL;
-    luaL_getmetatable(L, "_LOADLIB");
-    lua_setmetatable(L, -2);
+    luaL_setmetatable(L, "_LOADLIB");
     lua_pushfstring(L, "LOADLIB: %s", path);
     lua_pushvalue(L, -2);
     lua_settable(L, LUA_REGISTRYINDEX);
@@ -399,8 +420,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
 
 /* ------------------------------------------------------------------------ */
 
-static const int sentinel_ = 0;
-#define sentinel	((void *)&sentinel_)
+#define sentinel	((void *)0x4004)
 
 static int lj_cf_package_require(lua_State *L)
 {
@@ -490,29 +510,19 @@ static void modinit(lua_State *L, const char *modname)
 static int lj_cf_package_module(lua_State *L)
 {
   const char *modname = luaL_checkstring(L, 1);
-  int loaded = lua_gettop(L) + 1;  /* index of _LOADED table */
-  lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
-  lua_getfield(L, loaded, modname);  /* get _LOADED[modname] */
-  if (!lua_istable(L, -1)) {  /* not found? */
-    lua_pop(L, 1);  /* remove previous result */
-    /* try global variable (and create one if it does not exist) */
-    if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL)
-      lj_err_callerv(L, LJ_ERR_BADMODN, modname);
-    lua_pushvalue(L, -1);
-    lua_setfield(L, loaded, modname);  /* _LOADED[modname] = new table */
-  }
-  /* check whether table already has a _NAME field */
+  int lastarg = (int)(L->top - L->base);
+  luaL_pushmodule(L, modname, 1);
   lua_getfield(L, -1, "_NAME");
-  if (!lua_isnil(L, -1)) {  /* is table an initialized module? */
+  if (!lua_isnil(L, -1)) {  /* Module already initialized? */
     lua_pop(L, 1);
-  } else {  /* no; initialize it */
+  } else {
     lua_pop(L, 1);
     modinit(L, modname);
   }
   lua_pushvalue(L, -1);
   setfenv(L);
-  dooptions(L, loaded - 1);
-  return 0;
+  dooptions(L, lastarg);
+  return LJ_52;
 }
 
 static int lj_cf_package_seeall(lua_State *L)
@@ -583,13 +593,16 @@ LUALIB_API int luaopen_package(lua_State *L)
   lj_lib_pushcf(L, lj_cf_package_unloadlib, 1);
   lua_setfield(L, -2, "__gc");
   luaL_register(L, LUA_LOADLIBNAME, package_lib);
-  lua_pushvalue(L, -1);
-  lua_replace(L, LUA_ENVIRONINDEX);
+  lua_copy(L, -1, LUA_ENVIRONINDEX);
   lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
   for (i = 0; package_loaders[i] != NULL; i++) {
     lj_lib_pushcf(L, package_loaders[i], 1);
     lua_rawseti(L, -2, i+1);
   }
+#if LJ_52
+  lua_pushvalue(L, -1);
+  lua_setfield(L, -3, "searchers");
+#endif
   lua_setfield(L, -2, "loaders");
   lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
   noenv = lua_toboolean(L, -1);

+ 1 - 5
love/src/jni/LuaJIT-2.1/src/lib_string.c

@@ -1,6 +1,6 @@
 /*
 ** String library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -737,10 +737,6 @@ LUALIB_API int luaopen_string(lua_State *L)
   GCtab *mt;
   global_State *g;
   LJ_LIB_REG(L, LUA_STRLIBNAME, string);
-#if defined(LUA_COMPAT_GFIND) && !LJ_52
-  lua_getfield(L, -1, "gmatch");
-  lua_setfield(L, -2, "gfind");
-#endif
   mt = lj_tab_new(L, 0, 1);
   /* NOBARRIER: basemt is a GC root. */
   g = G(L);

+ 21 - 1
love/src/jni/LuaJIT-2.1/src/lib_table.c

@@ -1,6 +1,6 @@
 /*
 ** Table library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -129,6 +129,26 @@ LJLIB_LUA(table_remove) /*
   end
 */
 
+LJLIB_LUA(table_move) /*
+  function(a1, f, e, t, a2)
+    CHECK_tab(a1)
+    CHECK_int(f)
+    CHECK_int(e)
+    CHECK_int(t)
+    if a2 == nil then a2 = a1 end
+    CHECK_tab(a2)
+    if e >= f then
+      local d = t - f
+      if t > e or t <= f or a2 ~= a1 then
+	for i=f,e do a2[i+d] = a1[i] end
+      else
+	for i=e,f,-1 do a2[i+d] = a1[i] end
+      end
+    end
+    return a2
+  end
+*/
+
 LJLIB_CF(table_concat)		LJLIB_REC(.)
 {
   GCtab *t = lj_lib_checktab(L, 1);

+ 3 - 3
love/src/jni/LuaJIT-2.1/src/lj.supp

@@ -27,15 +27,15 @@
 {
    Optimized string compare
    Memcheck:Addr4
-   fun:lj_str_fastcmp
+   fun:str_fastcmp
 }
 {
    Optimized string compare
    Memcheck:Addr1
-   fun:lj_str_fastcmp
+   fun:str_fastcmp
 }
 {
    Optimized string compare
    Memcheck:Cond
-   fun:lj_str_fastcmp
+   fun:str_fastcmp
 }

+ 178 - 86
love/src/jni/LuaJIT-2.1/src/lj_alloc.c

@@ -72,13 +72,56 @@
 
 #define IS_DIRECT_BIT		(SIZE_T_ONE)
 
+
+/* Determine system-specific block allocation method. */
 #if LJ_TARGET_WINDOWS
 
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
+#define LJ_ALLOC_VIRTUALALLOC	1
+
 #if LJ_64 && !LJ_GC64
+#define LJ_ALLOC_NTAVM		1
+#endif
+
+#else
+
+#include <errno.h>
+/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
+#include <sys/mman.h>
+
+#define LJ_ALLOC_MMAP		1
+
+#if LJ_64
+
+#define LJ_ALLOC_MMAP_PROBE	1
+
+#if LJ_GC64
+#define LJ_ALLOC_MBITS		47	/* 128 TB in LJ_GC64 mode. */
+#elif LJ_TARGET_X64 && LJ_HASJIT
+/* Due to limitations in the x64 compiler backend. */
+#define LJ_ALLOC_MBITS		31	/* 2 GB on x64 with !LJ_GC64. */
+#else
+#define LJ_ALLOC_MBITS		32	/* 4 GB on other archs with !LJ_GC64. */
+#endif
+
+#endif
+
+#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
+#define LJ_ALLOC_MMAP32		1
+#endif
+
+#if LJ_TARGET_LINUX
+#define LJ_ALLOC_MREMAP		1
+#endif
+
+#endif
+
+
+#if LJ_ALLOC_VIRTUALALLOC
 
+#if LJ_ALLOC_NTAVM
 /* Undocumented, but hey, that's what we all love so much about Windows. */
 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
 		       size_t *size, ULONG alloctype, ULONG prot);
@@ -89,14 +132,15 @@ static PNTAVM ntavm;
 */
 #define NTAVM_ZEROBITS		1
 
-static void INIT_MMAP(void)
+static void init_mmap(void)
 {
   ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
 				 "NtAllocateVirtualMemory");
 }
+#define INIT_MMAP()	init_mmap()
 
 /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *CALL_MMAP(size_t size)
 {
   DWORD olderr = GetLastError();
   void *ptr = NULL;
@@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
 }
 
 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static LJ_AINLINE void *DIRECT_MMAP(size_t size)
+static void *DIRECT_MMAP(size_t size)
 {
   DWORD olderr = GetLastError();
   void *ptr = NULL;
@@ -119,23 +163,21 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
 
 #else
 
-#define INIT_MMAP()		((void)0)
-
 /* Win32 MMAP via VirtualAlloc */
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *CALL_MMAP(size_t size)
 {
   DWORD olderr = GetLastError();
-  void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+  void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
   SetLastError(olderr);
   return ptr ? ptr : MFAIL;
 }
 
 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static LJ_AINLINE void *DIRECT_MMAP(size_t size)
+static void *DIRECT_MMAP(size_t size)
 {
   DWORD olderr = GetLastError();
-  void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
-			   PAGE_READWRITE);
+  void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+			    PAGE_READWRITE);
   SetLastError(olderr);
   return ptr ? ptr : MFAIL;
 }
@@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
 #endif
 
 /* This function supports releasing coalesed segments */
-static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
+static int CALL_MUNMAP(void *ptr, size_t size)
 {
   DWORD olderr = GetLastError();
   MEMORY_BASIC_INFORMATION minfo;
@@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
   return 0;
 }
 
-#else
-
-#include <errno.h>
-#include <sys/mman.h>
+#elif LJ_ALLOC_MMAP
 
 #define MMAP_PROT		(PROT_READ|PROT_WRITE)
 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@@ -174,107 +213,152 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
 #endif
 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
 
-#if LJ_64 && !LJ_GC64
-/* 64 bit mode with 32 bit pointers needs special support for allocating
-** memory in the lower 2GB.
-*/
-
-#if defined(MAP_32BIT)
+#if LJ_ALLOC_MMAP_PROBE
 
-#if defined(__sun__)
-#define MMAP_REGION_START	((uintptr_t)0x1000)
+#ifdef MAP_TRYFIXED
+#define MMAP_FLAGS_PROBE	(MMAP_FLAGS|MAP_TRYFIXED)
 #else
-/* Actually this only gives us max. 1GB in current Linux kernels. */
-#define MMAP_REGION_START	((uintptr_t)0)
+#define MMAP_FLAGS_PROBE	MMAP_FLAGS
 #endif
 
-static LJ_AINLINE void *CALL_MMAP(size_t size)
-{
-  int olderr = errno;
-  void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
-  errno = olderr;
-  return ptr;
-}
+#define LJ_ALLOC_MMAP_PROBE_MAX		30
+#define LJ_ALLOC_MMAP_PROBE_LINEAR	5
 
-#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || defined(__CYGWIN__)
+#define LJ_ALLOC_MMAP_PROBE_LOWER	((uintptr_t)0x4000)
 
-/* OSX and FreeBSD mmap() use a naive first-fit linear search.
-** That's perfect for us. Except that -pagezero_size must be set for OSX,
-** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
-** to be reduced to 250MB on FreeBSD.
+/* No point in a giant ifdef mess. Just try to open /dev/urandom.
+** It doesn't really matter if this fails, since we get some ASLR bits from
+** every unsuitable allocation, too. And we prefer linear allocation, anyway.
 */
-#if LJ_TARGET_OSX || defined(__DragonFly__)
-#define MMAP_REGION_START	((uintptr_t)0x10000)
-#elif LJ_TARGET_PS4
-#define MMAP_REGION_START	((uintptr_t)0x4000)
-#else
-#define MMAP_REGION_START	((uintptr_t)0x10000000)
-#endif
-#define MMAP_REGION_END		((uintptr_t)0x80000000)
+#include <fcntl.h>
+#include <unistd.h>
 
-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
-#include <sys/resource.h>
-#endif
+static uintptr_t mmap_probe_seed(void)
+{
+  uintptr_t val;
+  int fd = open("/dev/urandom", O_RDONLY);
+  if (fd != -1) {
+    int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val));
+    (void)close(fd);
+    if (ok) return val;
+  }
+  return 1;  /* Punt. */
+}
 
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *mmap_probe(size_t size)
 {
-  int olderr = errno;
   /* Hint for next allocation. Doesn't need to be thread-safe. */
-  static uintptr_t alloc_hint = MMAP_REGION_START;
-  int retry = 0;
-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
-  static int rlimit_modified = 0;
-  if (LJ_UNLIKELY(rlimit_modified == 0)) {
-    struct rlimit rlim;
-    rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
-    setrlimit(RLIMIT_DATA, &rlim);  /* Ignore result. May fail below. */
-    rlimit_modified = 1;
-  }
-#endif
-  for (;;) {
-    void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
-    if ((uintptr_t)p >= MMAP_REGION_START &&
-	(uintptr_t)p + size < MMAP_REGION_END) {
-      alloc_hint = (uintptr_t)p + size;
+  static uintptr_t hint_addr = 0;
+  static uintptr_t hint_prng = 0;
+  int olderr = errno;
+  int retry;
+  for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
+    void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
+    uintptr_t addr = (uintptr_t)p;
+    if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER &&
+	((addr + size) >> LJ_ALLOC_MBITS) == 0) {
+      /* We got a suitable address. Bump the hint address. */
+      hint_addr = addr + size;
       errno = olderr;
       return p;
     }
-    if (p != CMFAIL) munmap(p, size);
-#if defined(__sun__) || defined(__DragonFly__)
-    alloc_hint += 0x1000000;  /* Need near-exhaustive linear scan. */
-    if (alloc_hint + size < MMAP_REGION_END) continue;
-#endif
-    if (retry) break;
-    retry = 1;
-    alloc_hint = MMAP_REGION_START;
+    if (p != MFAIL) {
+      munmap(p, size);
+    } else if (errno == ENOMEM) {
+      return MFAIL;
+    }
+    if (hint_addr) {
+      /* First, try linear probing. */
+      if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
+	hint_addr += 0x1000000;
+	if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
+	  hint_addr = 0;
+	continue;
+      } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
+	/* Next, try a no-hint probe to get back an ASLR address. */
+	hint_addr = 0;
+	continue;
+      }
+    }
+    /* Finally, try pseudo-random probing. */
+    if (LJ_UNLIKELY(hint_prng == 0)) {
+      hint_prng = mmap_probe_seed();
+    }
+    /* The unsuitable address we got has some ASLR PRNG bits. */
+    hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1));
+    do {  /* The PRNG itself is very weak, but see above. */
+      hint_prng = hint_prng * 1103515245 + 12345;
+      hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE;
+      hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1);
+    } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
   }
   errno = olderr;
-  return CMFAIL;
+  return MFAIL;
 }
 
+#endif
+
+#if LJ_ALLOC_MMAP32
+
+#if defined(__sun__)
+#define LJ_ALLOC_MMAP32_START	((uintptr_t)0x1000)
 #else
+#define LJ_ALLOC_MMAP32_START	((uintptr_t)0)
+#endif
 
-#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
+static void *mmap_map32(size_t size)
+{
+#if LJ_ALLOC_MMAP_PROBE
+  static int fallback = 0;
+  if (fallback)
+    return mmap_probe(size);
+#endif
+  {
+    int olderr = errno;
+    void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
+    errno = olderr;
+    /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
+#if LJ_ALLOC_MMAP_PROBE
+    if (ptr == MFAIL) {
+      fallback = 1;
+      return mmap_probe(size);
+    }
+#endif
+    return ptr;
+  }
+}
 
 #endif
 
+#if LJ_ALLOC_MMAP32
+#define CALL_MMAP(size)		mmap_map32(size)
+#elif LJ_ALLOC_MMAP_PROBE
+#define CALL_MMAP(size)		mmap_probe(size)
 #else
-
-/* 32 bit mode and GC64 mode is easy. */
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *CALL_MMAP(size_t size)
 {
   int olderr = errno;
   void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
   errno = olderr;
   return ptr;
 }
-
 #endif
 
-#define INIT_MMAP()		((void)0)
-#define DIRECT_MMAP(s)		CALL_MMAP(s)
+#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
+
+#include <sys/resource.h>
+
+static void init_mmap(void)
+{
+  struct rlimit rlim;
+  rlim.rlim_cur = rlim.rlim_max = 0x10000;
+  setrlimit(RLIMIT_DATA, &rlim);  /* Ignore result. May fail later. */
+}
+#define INIT_MMAP()	init_mmap()
+
+#endif
 
-static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
+static int CALL_MUNMAP(void *ptr, size_t size)
 {
   int olderr = errno;
   int ret = munmap(ptr, size);
@@ -282,10 +366,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
   return ret;
 }
 
-#if LJ_TARGET_LINUX
+#if LJ_ALLOC_MREMAP
 /* Need to define _GNU_SOURCE to get the mremap prototype. */
-static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
-				     int flags)
+static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
 {
   int olderr = errno;
   ptr = mremap(ptr, osz, nsz, flags);
@@ -305,6 +388,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
 
 #endif
 
+
+#ifndef INIT_MMAP
+#define INIT_MMAP()		((void)0)
+#endif
+
+#ifndef DIRECT_MMAP
+#define DIRECT_MMAP(s)		CALL_MMAP(s)
+#endif
+
 #ifndef CALL_MREMAP
 #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
 #endif

+ 97 - 18
love/src/jni/LuaJIT-2.1/src/lj_api.c

@@ -1,6 +1,6 @@
 /*
 ** Public Lua/C API.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -112,6 +112,13 @@ LUA_API void lua_xmove(lua_State *from, lua_State *to, int n)
   from->top = f;
 }
 
+LUA_API const lua_Number *lua_version(lua_State *L)
+{
+  static const lua_Number version = LUA_VERSION_NUM;
+  UNUSED(L);
+  return &version;
+}
+
 /* -- Stack manipulation -------------------------------------------------- */
 
 LUA_API int lua_gettop(lua_State *L)
@@ -152,30 +159,40 @@ LUA_API void lua_insert(lua_State *L, int idx)
   copyTV(L, p, L->top);
 }
 
-LUA_API void lua_replace(lua_State *L, int idx)
+static void copy_slot(lua_State *L, TValue *f, int idx)
 {
-  api_checknelems(L, 1);
   if (idx == LUA_GLOBALSINDEX) {
-    api_check(L, tvistab(L->top-1));
+    api_check(L, tvistab(f));
     /* NOBARRIER: A thread (i.e. L) is never black. */
-    setgcref(L->env, obj2gco(tabV(L->top-1)));
+    setgcref(L->env, obj2gco(tabV(f)));
   } else if (idx == LUA_ENVIRONINDEX) {
     GCfunc *fn = curr_func(L);
     if (fn->c.gct != ~LJ_TFUNC)
       lj_err_msg(L, LJ_ERR_NOENV);
-    api_check(L, tvistab(L->top-1));
-    setgcref(fn->c.env, obj2gco(tabV(L->top-1)));
-    lj_gc_barrier(L, fn, L->top-1);
+    api_check(L, tvistab(f));
+    setgcref(fn->c.env, obj2gco(tabV(f)));
+    lj_gc_barrier(L, fn, f);
   } else {
     TValue *o = index2adr(L, idx);
     api_checkvalidindex(L, o);
-    copyTV(L, o, L->top-1);
+    copyTV(L, o, f);
     if (idx < LUA_GLOBALSINDEX)  /* Need a barrier for upvalues. */
-      lj_gc_barrier(L, curr_func(L), L->top-1);
+      lj_gc_barrier(L, curr_func(L), f);
   }
+}
+
+LUA_API void lua_replace(lua_State *L, int idx)
+{
+  api_checknelems(L, 1);
+  copy_slot(L, L->top - 1, idx);
   L->top--;
 }
 
+LUA_API void lua_copy(lua_State *L, int fromidx, int toidx)
+{
+  copy_slot(L, index2adr(L, fromidx), toidx);
+}
+
 LUA_API void lua_pushvalue(lua_State *L, int idx)
 {
   copyTV(L, L->top, index2adr(L, idx));
@@ -325,6 +342,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
     return 0;
 }
 
+LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  if (LJ_LIKELY(tvisnumber(o))) {
+    if (ok) *ok = 1;
+    return numberVnum(o);
+  } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) {
+    if (ok) *ok = 1;
+    return numV(&tmp);
+  } else {
+    if (ok) *ok = 0;
+    return 0;
+  }
+}
+
 LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
 {
   cTValue *o = index2adr(L, idx);
@@ -362,7 +395,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
     if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
       return 0;
     if (tvisint(&tmp))
-      return (lua_Integer)intV(&tmp);
+      return intV(&tmp);
     n = numV(&tmp);
   }
 #if LJ_64
@@ -372,6 +405,35 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
 #endif
 }
 
+LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  lua_Number n;
+  if (LJ_LIKELY(tvisint(o))) {
+    if (ok) *ok = 1;
+    return intV(o);
+  } else if (LJ_LIKELY(tvisnum(o))) {
+    n = numV(o);
+  } else {
+    if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) {
+      if (ok) *ok = 0;
+      return 0;
+    }
+    if (tvisint(&tmp)) {
+      if (ok) *ok = 1;
+      return intV(&tmp);
+    }
+    n = numV(&tmp);
+  }
+  if (ok) *ok = 1;
+#if LJ_64
+  return (lua_Integer)n;
+#else
+  return lj_num2int(n);
+#endif
+}
+
 LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
 {
   cTValue *o = index2adr(L, idx);
@@ -858,7 +920,7 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2)
   lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
 }
 
-LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
+LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname)
 {
   cTValue *o = index2adr(L, idx);
   if (tvisudata(o)) {
@@ -867,8 +929,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
     if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
       return uddata(ud);
   }
-  lj_err_argtype(L, idx, tname);
-  return NULL;  /* unreachable */
+  return NULL;  /* value is not a userdata with a metatable */
+}
+
+LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
+{
+  void *p = luaL_testudata(L, idx, tname);
+  if (!p) lj_err_argtype(L, idx, tname);
+  return p;
 }
 
 /* -- Object setters ------------------------------------------------------ */
@@ -977,6 +1045,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
   return 1;
 }
 
+LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname)
+{
+  lua_getfield(L, LUA_REGISTRYINDEX, tname);
+  lua_setmetatable(L, -2);
+}
+
 LUA_API int lua_setfenv(lua_State *L, int idx)
 {
   cTValue *o = index2adr(L, idx);
@@ -1032,7 +1106,7 @@ static TValue *api_call_base(lua_State *L, int nargs)
 
 LUA_API void lua_call(lua_State *L, int nargs, int nresults)
 {
-  api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+  api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
   api_checknelems(L, nargs+1);
   lj_vm_call(L, api_call_base(L, nargs), nresults+1);
 }
@@ -1043,7 +1117,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
   uint8_t oldh = hook_save(g);
   ptrdiff_t ef;
   int status;
-  api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+  api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
   api_checknelems(L, nargs+1);
   if (errfunc == 0) {
     ef = 0;
@@ -1075,7 +1149,7 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
   global_State *g = G(L);
   uint8_t oldh = hook_save(g);
   int status;
-  api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+  api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
   status = lj_vm_cpcall(L, func, ud, cpcall);
   if (status) hook_restore(g, oldh);
   return status;
@@ -1096,6 +1170,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 
 /* -- Coroutine yield and resume ------------------------------------------ */
 
+LUA_API int lua_isyieldable(lua_State *L)
+{
+  return cframe_canyield(L->cframe);
+}
+
 LUA_API int lua_yield(lua_State *L, int nresults)
 {
   void *cf = L->cframe;
@@ -1140,7 +1219,7 @@ LUA_API int lua_resume(lua_State *L, int nargs)
 {
   if (L->cframe == NULL && L->status <= LUA_YIELD)
     return lj_vm_resume(L,
-      L->status == 0 ? api_call_base(L, nargs) : L->top - nargs,
+      L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs,
       0, 0);
   L->top = L->base;
   setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));

+ 115 - 32
love/src/jni/LuaJIT-2.1/src/lj_arch.h

@@ -1,6 +1,6 @@
 /*
 ** Target architecture selection.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_ARCH_H
@@ -25,6 +25,10 @@
 #define LUAJIT_ARCH_ppc		5
 #define LUAJIT_ARCH_MIPS	6
 #define LUAJIT_ARCH_mips	6
+#define LUAJIT_ARCH_MIPS32	6
+#define LUAJIT_ARCH_mips32	6
+#define LUAJIT_ARCH_MIPS64	7
+#define LUAJIT_ARCH_mips64	7
 
 /* Target OS. */
 #define LUAJIT_OS_OTHER		0
@@ -47,8 +51,10 @@
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM64
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
+#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
+#define LUAJIT_TARGET	LUAJIT_ARCH_MIPS64
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
-#define LUAJIT_TARGET	LUAJIT_ARCH_MIPS
+#define LUAJIT_TARGET	LUAJIT_ARCH_MIPS32
 #else
 #error "No support for this architecture (yet)"
 #endif
@@ -68,7 +74,10 @@
        defined(__NetBSD__) || defined(__OpenBSD__) || \
        defined(__DragonFly__)) && !defined(__ORBIS__)
 #define LUAJIT_OS	LUAJIT_OS_BSD
-#elif (defined(__sun__) && defined(__svr4__)) || defined(__CYGWIN__)
+#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__)
+#define LUAJIT_OS	LUAJIT_OS_POSIX
+#elif defined(__CYGWIN__)
+#define LJ_TARGET_CYGWIN	1
 #define LUAJIT_OS	LUAJIT_OS_POSIX
 #else
 #define LUAJIT_OS	LUAJIT_OS_OTHER
@@ -126,6 +135,13 @@
 #define LJ_TARGET_GC64		1
 #endif
 
+#ifdef _UWP
+#define LJ_TARGET_UWP		1
+#if LUAJIT_TARGET == LUAJIT_ARCH_X64
+#define LJ_TARGET_GC64		1
+#endif
+#endif
+
 #define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
 #define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
 #define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
@@ -137,7 +153,7 @@
 #define LJ_ARCH_NAME		"x86"
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || __CYGWIN__
+#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
 #define LJ_ABI_WIN		1
 #else
 #define LJ_ABI_WIN		0
@@ -155,7 +171,7 @@
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || __CYGWIN__
+#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
 #define LJ_ABI_WIN		1
 #else
 #define LJ_ABI_WIN		0
@@ -168,7 +184,7 @@
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNALIGNED	1
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
-#ifdef LUAJIT_ENABLE_GC64
+#ifndef LUAJIT_DISABLE_GC64
 #define LJ_TARGET_GC64		1
 #endif
 
@@ -192,7 +208,7 @@
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 
-#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
+#if __ARM_ARCH_8__ || __ARM_ARCH_8A__
 #define LJ_ARCH_VERSION		80
 #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
 #define LJ_ARCH_VERSION		70
@@ -206,9 +222,14 @@
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
 
-#define LJ_ARCH_NAME		"arm64"
 #define LJ_ARCH_BITS		64
+#if defined(__AARCH64EB__)
+#define LJ_ARCH_NAME		"arm64be"
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#else
+#define LJ_ARCH_NAME		"arm64"
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
+#endif
 #define LJ_TARGET_ARM64		1
 #define LJ_TARGET_EHRETREG	0
 #define LJ_TARGET_JUMPRANGE	27	/* +-2^27 = +-128MB */
@@ -217,7 +238,6 @@
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_TARGET_GC64		1
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
-#define LJ_ARCH_NOJIT		1	/* NYI */
 
 #define LJ_ARCH_VERSION		80
 
@@ -241,6 +261,28 @@
 #else
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_NAME		"ppc"
+
+#if !defined(LJ_ARCH_HASFPU)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ARCH_HASFPU		0
+#else
+#define LJ_ARCH_HASFPU		1
+#endif
+#endif
+
+#if !defined(LJ_ABI_SOFTFP)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ABI_SOFTFP		1
+#else
+#define LJ_ABI_SOFTFP		0
+#endif
+#endif
+#endif
+
+#if LJ_ABI_SOFTFP
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+#else
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
 #endif
 
 #define LJ_TARGET_PPC		1
@@ -249,7 +291,6 @@
 #define LJ_TARGET_MASKSHIFT	0
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
 
 #if LJ_TARGET_CONSOLE
 #define LJ_ARCH_PPC32ON64	1
@@ -286,25 +327,57 @@
 #define LJ_ARCH_XENON		1
 #endif
 
-#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
+#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
 
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
+#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
 #define LJ_ARCH_NAME		"mipsel"
+#else
+#define LJ_ARCH_NAME		"mips64el"
+#endif
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
 #else
+#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
 #define LJ_ARCH_NAME		"mips"
+#else
+#define LJ_ARCH_NAME		"mips64"
+#endif
 #define LJ_ARCH_ENDIAN		LUAJIT_BE
 #endif
+
+#if !defined(LJ_ARCH_HASFPU)
+#ifdef __mips_soft_float
+#define LJ_ARCH_HASFPU		0
+#else
+#define LJ_ARCH_HASFPU		1
+#endif
+#endif
+
+#if !defined(LJ_ABI_SOFTFP)
+#ifdef __mips_soft_float
+#define LJ_ABI_SOFTFP		1
+#else
+#define LJ_ABI_SOFTFP		0
+#endif
+#endif
+
+#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
 #define LJ_ARCH_BITS		32
+#define LJ_TARGET_MIPS32	1
+#else
+#define LJ_ARCH_BITS		64
+#define LJ_TARGET_MIPS64	1
+#define LJ_TARGET_GC64		1
+#endif
 #define LJ_TARGET_MIPS		1
 #define LJ_TARGET_EHRETREG	4
 #define LJ_TARGET_JUMPRANGE	27	/* 2*2^27 = 256MB-aligned region */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
-#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 
-#if _MIPS_ARCH_MIPS32R2
+#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
 #define LJ_ARCH_VERSION		20
 #else
 #define LJ_ARCH_VERSION		10
@@ -334,7 +407,7 @@
 #endif
 #elif LJ_TARGET_ARM64
 #if __clang__
-#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
 #error "Need at least Clang 3.5 or newer"
 #endif
 #else
@@ -366,31 +439,23 @@
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
 #endif
 #elif LJ_TARGET_ARM64
-#if defined(__AARCH64EB__)
-#error "No support for big-endian ARM64"
-#endif
 #if defined(_ILP32)
 #error "No support for ILP32 model on ARM64"
 #endif
 #elif LJ_TARGET_PPC
-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
-#error "No support for PowerPC CPUs without double-precision FPU"
-#endif
-#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
+#if !LJ_ARCH_PPC64 && (defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)))
 #error "No support for little-endian PPC32"
 #endif
-#if LJ_ARCH_PPC64
-#error "No support for PowerPC 64 bit mode (yet)"
-#endif
-#ifdef __NO_FPRS__
+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
 #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
 #endif
-#elif LJ_TARGET_MIPS
-#if defined(__mips_soft_float)
-#error "No support for MIPS CPUs without FPU"
+#elif LJ_TARGET_MIPS32
+#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
+#error "Only o32 ABI supported for MIPS32"
 #endif
-#if defined(_LP64)
-#error "No support for MIPS64"
+#elif LJ_TARGET_MIPS64
+#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
+#error "Only n64 ABI supported for MIPS64"
 #endif
 #endif
 #endif
@@ -431,7 +496,7 @@
 #endif
 
 /* Disable or enable the JIT compiler. */
-#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
+#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
 #define LJ_HASJIT		0
 #else
 #define LJ_HASJIT		1
@@ -466,6 +531,7 @@
 #define LJ_ABI_SOFTFP		0
 #endif
 #define LJ_SOFTFP		(!LJ_ARCH_HASFPU)
+#define LJ_SOFTFP32		(LJ_SOFTFP && LJ_32)
 
 #if LJ_ARCH_ENDIAN == LUAJIT_BE
 #define LJ_LE			0
@@ -492,7 +558,7 @@
 #endif
 
 /* Various workarounds for embedded operating systems or weak C runtimes. */
-#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
+#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_LOG2
 #endif
 #if defined(__symbian__) || LJ_TARGET_WINDOWS
@@ -502,10 +568,27 @@
 #define LJ_NO_SYSTEM		1
 #endif
 
+#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
+/* NYI: no support for compact unwind specification, yet. */
+#define LUAJIT_NO_UNWIND	1
+#endif
+
 #if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
 #define LJ_NO_UNWIND		1
 #endif
 
+#if LJ_TARGET_WINDOWS
+#if LJ_TARGET_UWP
+#define LJ_WIN_VALLOC	VirtualAllocFromApp
+#define LJ_WIN_VPROTECT	VirtualProtectFromApp
+extern void *LJ_WIN_LOADLIBA(const char *path);
+#else
+#define LJ_WIN_VALLOC	VirtualAlloc
+#define LJ_WIN_VPROTECT	VirtualProtect
+#define LJ_WIN_LOADLIBA(path)	LoadLibraryExA((path), NULL, 0)
+#endif
+#endif
+
 /* Compatibility with Lua 5.1 vs. 5.2. */
 #ifdef LUAJIT_ENABLE_LUA52COMPAT
 #define LJ_52			1

+ 200 - 65
love/src/jni/LuaJIT-2.1/src/lj_asm.c

@@ -1,6 +1,6 @@
 /*
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_asm_c
@@ -22,7 +22,6 @@
 #include "lj_ircall.h"
 #include "lj_iropt.h"
 #include "lj_mcode.h"
-#include "lj_iropt.h"
 #include "lj_trace.h"
 #include "lj_snap.h"
 #include "lj_asm.h"
@@ -91,7 +90,7 @@ typedef struct ASMState {
   MCode *realign;	/* Realign loop if not NULL. */
 
 #ifdef RID_NUM_KREF
-  int32_t krefk[RID_NUM_KREF];
+  intptr_t krefk[RID_NUM_KREF];
 #endif
   IRRef1 phireg[RID_MAX];  /* PHI register references. */
   uint16_t parentmap[LJ_MAX_JSLOTS];  /* Parent instruction to RegSP map. */
@@ -144,7 +143,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
 #define ra_krefreg(ref)		((Reg)(RID_MIN_KREF + (Reg)(ref)))
 #define ra_krefk(as, ref)	(as->krefk[(ref)])
 
-static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k)
+static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
 {
   IRRef ref = (IRRef)(r - RID_MIN_KREF);
   as->krefk[ref] = k;
@@ -171,6 +170,8 @@ IRFLDEF(FLOFS)
 #include "lj_emit_x86.h"
 #elif LJ_TARGET_ARM
 #include "lj_emit_arm.h"
+#elif LJ_TARGET_ARM64
+#include "lj_emit_arm64.h"
 #elif LJ_TARGET_PPC
 #include "lj_emit_ppc.h"
 #elif LJ_TARGET_MIPS
@@ -322,7 +323,11 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
     lua_assert(!rset_test(as->freeset, r));
     ra_free(as, r);
     ra_modified(as, r);
+#if LJ_64
+    emit_loadu64(as, r, ra_krefk(as, ref));
+#else
     emit_loadi(as, r, ra_krefk(as, ref));
+#endif
     return r;
   }
   ir = IR(ref);
@@ -332,9 +337,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
   ra_modified(as, r);
   ir->r = RID_INIT;  /* Do not keep any hint. */
   RA_DBGX((as, "remat     $i $r", ir, r));
-#if !LJ_SOFTFP
+#if !LJ_SOFTFP32
   if (ir->o == IR_KNUM) {
-    emit_loadn(as, r, ir_knum(ir));
+    emit_loadk64(as, r, ir);
   } else
 #endif
   if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
@@ -346,6 +351,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
 #if LJ_64
   } else if (ir->o == IR_KINT64) {
     emit_loadu64(as, r, ir_kint64(ir)->u64);
+#if LJ_GC64
+  } else if (ir->o == IR_KGC) {
+    emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
+  } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+    emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
+#endif
 #endif
   } else {
     lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@@ -518,7 +529,7 @@ static void ra_evictk(ASMState *as)
 
 #ifdef RID_NUM_KREF
 /* Allocate a register for a constant. */
-static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
 {
   /* First try to find a register which already holds the same constant. */
   RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -527,9 +538,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
     IRRef ref;
     r = rset_pickbot(work);
     ref = regcost_ref(as->cost[r]);
+#if LJ_64
+    if (ref < ASMREF_L) {
+      if (ra_iskref(ref)) {
+	if (k == ra_krefk(as, ref))
+	  return r;
+      } else {
+	IRIns *ir = IR(ref);
+	if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
+#if LJ_GC64
+	    (ir->o == IR_KINT && k == ir->i) ||
+	    (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
+	    ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
+	     k == (intptr_t)ir_kptr(ir))
+#else
+	    (ir->o != IR_KINT64 && k == ir->i)
+#endif
+	   )
+	  return r;
+      }
+    }
+#else
     if (ref < ASMREF_L &&
 	k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
       return r;
+#endif
     rset_clear(work, r);
   }
   pick = as->freeset & allow;
@@ -549,7 +582,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
 }
 
 /* Allocate a specific register for a constant. */
-static void ra_allockreg(ASMState *as, int32_t k, Reg r)
+static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
 {
   Reg kr = ra_allock(as, k, RID2RSET(r));
   if (kr != r) {
@@ -619,10 +652,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
   return r;
 }
 
+/* Add a register rename to the IR. */
+static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
+{
+  IRRef ren;
+  lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
+  ren = tref_ref(lj_ir_emit(as->J));
+  as->J->cur.ir[ren].r = (uint8_t)down;
+  as->J->cur.ir[ren].s = SPS_NONE;
+}
+
 /* Rename register allocation and emit move. */
 static void ra_rename(ASMState *as, Reg down, Reg up)
 {
-  IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
+  IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
   IRIns *ir = IR(ref);
   ir->r = (uint8_t)up;
   as->cost[down] = 0;
@@ -635,11 +678,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
   RA_DBGX((as, "rename    $f $r $r", regcost_ref(as->cost[up]), down, up));
   emit_movrr(as, ir, down, up);  /* Backwards codegen needs inverse move. */
   if (!ra_hasspill(IR(ref)->s)) {  /* Add the rename to the IR. */
-    lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
-    ren = tref_ref(lj_ir_emit(as->J));
-    as->ir = as->T->ir;  /* The IR may have been reallocated. */
-    IR(ren)->r = (uint8_t)down;
-    IR(ren)->s = SPS_NONE;
+    ra_addrename(as, down, ref, as->snapno);
   }
 }
 
@@ -689,16 +728,20 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
   if (ra_noreg(left)) {
     if (irref_isk(lref)) {
       if (ir->o == IR_KNUM) {
-	cTValue *tv = ir_knum(ir);
 	/* FP remat needs a load except for +0. Still better than eviction. */
-	if (tvispzero(tv) || !(as->freeset & RSET_FPR)) {
-	  emit_loadn(as, dest, tv);
+	if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
+	  emit_loadk64(as, dest, ir);
 	  return;
 	}
 #if LJ_64
       } else if (ir->o == IR_KINT64) {
-	emit_loadu64(as, dest, ir_kint64(ir)->u64);
+	emit_loadk64(as, dest, ir);
 	return;
+#if LJ_GC64
+      } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+	emit_loadk64(as, dest, ir);
+	return;
+#endif
 #endif
       } else if (ir->o != IR_KPRI) {
 	lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@@ -941,7 +984,7 @@ static void asm_snap_prep(ASMState *as)
   } else {
     /* Process any renames above the highwater mark. */
     for (; as->snaprename < as->T->nins; as->snaprename++) {
-      IRIns *ir = IR(as->snaprename);
+      IRIns *ir = &as->T->ir[as->snaprename];
       if (asm_snap_checkrename(as, ir->op1))
 	ir->op2 = REF_BIAS-1;  /* Kill rename. */
     }
@@ -973,7 +1016,11 @@ static uint32_t ir_khash(IRIns *ir)
   } else {
     lua_assert(irt_isgcv(ir->t));
     lo = u32ptr(ir_kgc(ir));
+#if LJ_GC64
+    hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
+#else
     hi = lo + HASH_BIAS;
+#endif
   }
   return hashrot(lo, hi);
 }
@@ -1055,7 +1102,7 @@ static void asm_bufhdr(ASMState *as, IRIns *ir)
     }
   } else {
     Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
-    /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
+    /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
     emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
     emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
   }
@@ -1071,7 +1118,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
   IRRef args[3];
   IRIns *irs;
-  int kchar = -1;
+  int kchar = -129;
   args[0] = ir->op1;  /* SBuf * */
   args[1] = ir->op2;  /* GCstr * */
   irs = IR(ir->op2);
@@ -1079,7 +1126,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
   if (irs->o == IR_KGC) {
     GCstr *s = ir_kstr(irs);
     if (s->len == 1) {  /* Optimize put of single-char string constant. */
-      kchar = strdata(s)[0];
+      kchar = (int8_t)strdata(s)[0];  /* Signed! */
       args[1] = ASMREF_TMP1;  /* int, truncated to char */
       ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
     }
@@ -1106,7 +1153,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
   asm_gencall(as, ci, args);
   if (args[1] == ASMREF_TMP1) {
     Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
-    if (kchar == -1)
+    if (kchar == -129)
       asm_tvptr(as, tmp, irs->op1);
     else
       ra_allockreg(as, kchar, tmp);
@@ -1261,7 +1308,7 @@ static void asm_call(ASMState *as, IRIns *ir)
   asm_gencall(as, ci, args);
 }
 
-#if !LJ_SOFTFP
+#if !LJ_SOFTFP32
 static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
@@ -1472,12 +1519,7 @@ static void asm_phi_fixup(ASMState *as)
       irt_clearmark(ir->t);
       /* Left PHI gained a spill slot before the loop? */
       if (ra_hasspill(ir->s)) {
-	IRRef ren;
-	lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
-	ren = tref_ref(lj_ir_emit(as->J));
-	as->ir = as->T->ir;  /* The IR may have been reallocated. */
-	IR(ren)->r = (uint8_t)r;
-	IR(ren)->s = SPS_NONE;
+	ra_addrename(as, r, lref, as->loopsnapno);
       }
     }
     rset_clear(work, r);
@@ -1552,6 +1594,8 @@ static void asm_loop(ASMState *as)
 #include "lj_asm_x86.h"
 #elif LJ_TARGET_ARM
 #include "lj_asm_arm.h"
+#elif LJ_TARGET_ARM64
+#include "lj_asm_arm64.h"
 #elif LJ_TARGET_PPC
 #include "lj_asm_ppc.h"
 #elif LJ_TARGET_MIPS
@@ -1609,16 +1653,24 @@ static void asm_ir(ASMState *as, IRIns *ir)
   case IR_ADD: asm_add(as, ir); break;
   case IR_SUB: asm_sub(as, ir); break;
   case IR_MUL: asm_mul(as, ir); break;
-  case IR_DIV: asm_div(as, ir); break;
   case IR_MOD: asm_mod(as, ir); break;
-  case IR_POW: asm_pow(as, ir); break;
   case IR_NEG: asm_neg(as, ir); break;
+#if LJ_SOFTFP32
+  case IR_DIV: case IR_POW: case IR_ABS:
+  case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
+    lua_assert(0);  /* Unused for LJ_SOFTFP32. */
+    break;
+#else
+  case IR_DIV: asm_div(as, ir); break;
+  case IR_POW: asm_pow(as, ir); break;
   case IR_ABS: asm_abs(as, ir); break;
   case IR_ATAN2: asm_atan2(as, ir); break;
   case IR_LDEXP: asm_ldexp(as, ir); break;
+  case IR_FPMATH: asm_fpmath(as, ir); break;
+  case IR_TOBIT: asm_tobit(as, ir); break;
+#endif
   case IR_MIN: asm_min(as, ir); break;
   case IR_MAX: asm_max(as, ir); break;
-  case IR_FPMATH: asm_fpmath(as, ir); break;
 
   /* Overflow-checking arithmetic ops. */
   case IR_ADDOV: asm_addov(as, ir); break;
@@ -1663,7 +1715,6 @@ static void asm_ir(ASMState *as, IRIns *ir)
   case IR_OBAR: asm_obar(as, ir); break;
 
   /* Type conversions. */
-  case IR_TOBIT: asm_tobit(as, ir); break;
   case IR_CONV: asm_conv(as, ir); break;
   case IR_TOSTR: asm_tostr(as, ir); break;
   case IR_STRTO: asm_strto(as, ir); break;
@@ -1881,7 +1932,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
     SnapEntry sn = map[n-1];
     if ((sn & SNAP_FRAME)) {
       *gotframe = 1;
-      return snap_slot(sn);
+      return snap_slot(sn) - LJ_FR2;
     }
   }
   return 0;
@@ -1901,16 +1952,20 @@ static void asm_tail_link(ASMState *as)
 
   if (as->T->link == 0) {
     /* Setup fixed registers for exit to interpreter. */
-    const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]);
+    const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
     int32_t mres;
     if (bc_op(*pc) == BC_JLOOP) {  /* NYI: find a better way to do this. */
       BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
       if (bc_isret(bc_op(*retpc)))
 	pc = retpc;
     }
+#if LJ_GC64
+    emit_loadu64(as, RID_LPC, u64ptr(pc));
+#else
     ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
     ra_allockreg(as, i32ptr(pc), RID_LPC);
-    mres = (int32_t)(snap->nslots - baseslot);
+#endif
+    mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
     switch (bc_op(*pc)) {
     case BC_CALLM: case BC_CALLMT:
       mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
@@ -1925,6 +1980,11 @@ static void asm_tail_link(ASMState *as)
   }
   emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
 
+  if (as->J->ktrace) {  /* Patch ktrace slot with the final GCtrace pointer. */
+    setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
+    IR(as->J->ktrace)->o = IR_KGC;
+  }
+
   /* Sync the interpreter state with the on-trace state. */
   asm_stack_restore(as, snap);
 
@@ -1950,17 +2010,23 @@ static void asm_setup_regsp(ASMState *as)
   ra_setup(as);
 
   /* Clear reg/sp for constants. */
-  for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++)
+  for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
     ir->prev = REGSP_INIT;
+    if (irt_is64(ir->t) && ir->o != IR_KNULL) {
+#if LJ_GC64
+      /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
+      ir->i = 0;  /* Will become non-zero only for RIP-relative addresses. */
+#else
+      /* Make life easier for backends by putting address of constant in i. */
+      ir->i = (int32_t)(intptr_t)(ir+1);
+#endif
+      ir++;
+    }
+  }
 
   /* REF_BASE is used for implicit references to the BASE register. */
   lastir->prev = REGSP_HINT(RID_BASE);
 
-  ir = IR(nins-1);
-  if (ir->o == IR_RENAME) {
-    do { ir--; nins--; } while (ir->o == IR_RENAME);
-    T->nins = nins;  /* Remove any renames left over from ASM restart. */
-  }
   as->snaprename = nins;
   as->snapref = nins;
   as->snapno = T->nsnap;
@@ -2063,8 +2129,8 @@ static void asm_setup_regsp(ASMState *as)
 #if LJ_SOFTFP
     case IR_MIN: case IR_MAX:
       if ((ir+1)->o != IR_HIOP) break;
-      /* fallthrough */
 #endif
+    /* fallthrough */
     /* C calls evict all scratch regs and return results in RID_RET. */
     case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
       if (REGARG_NUMGPR < 3 && as->evenspill < 3)
@@ -2075,9 +2141,12 @@ static void asm_setup_regsp(ASMState *as)
 	if (ir->op2 != REF_NIL && as->evenspill < 4)
 	  as->evenspill = 4;  /* lj_cdata_newv needs 4 args. */
       }
+      /* fallthrough */
 #else
+      /* fallthrough */
     case IR_CNEW:
 #endif
+      /* fallthrough */
     case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
     case IR_BUFSTR:
       ir->prev = REGSP_HINT(RID_RET);
@@ -2098,6 +2167,7 @@ static void asm_setup_regsp(ASMState *as)
     case IR_LDEXP:
 #endif
 #endif
+      /* fallthrough */
     case IR_POW:
       if (!LJ_SOFTFP && irt_isnum(ir->t)) {
 	if (inloop)
@@ -2109,7 +2179,7 @@ static void asm_setup_regsp(ASMState *as)
 	continue;
 #endif
       }
-      /* fallthrough for integer POW */
+      /* fallthrough */ /* for integer POW */
     case IR_DIV: case IR_MOD:
       if (!irt_isnum(ir->t)) {
 	ir->prev = REGSP_HINT(RID_RET);
@@ -2143,7 +2213,11 @@ static void asm_setup_regsp(ASMState *as)
 #endif
 #if LJ_TARGET_X86ORX64
     /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
-    case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
+    case IR_BSHL: case IR_BSHR: case IR_BSAR:
+      if ((as->flags & JIT_F_BMI2))  /* Except if BMI2 is available. */
+	break;
+      /* fallthrough */
+    case IR_BROL: case IR_BROR:
       if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
 	IR(ir->op2)->r = REGSP_HINT(RID_ECX);
 	if (inloop)
@@ -2189,14 +2263,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
   ASMState *as = &as_;
   MCode *origtop;
 
+  /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
+  {
+    IRRef nins = T->nins;
+    IRIns *ir = &T->ir[nins-1];
+    if (ir->o == IR_NOP || ir->o == IR_RENAME) {
+      do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
+      T->nins = nins;
+    }
+  }
+
   /* Ensure an initialized instruction beyond the last one for HIOP checks. */
-  J->cur.nins = lj_ir_nextins(J);
-  J->cur.ir[J->cur.nins].o = IR_NOP;
+  /* This also allows one RENAME to be added without reallocating curfinal. */
+  as->orignins = lj_ir_nextins(J);
+  J->cur.ir[as->orignins].o = IR_NOP;
 
   /* Setup initial state. Copy some fields to reduce indirections. */
   as->J = J;
   as->T = T;
-  as->ir = T->ir;
+  J->curfinal = lj_trace_alloc(J->L, T);  /* This copies the IR, too. */
   as->flags = J->flags;
   as->loopref = J->loopref;
   as->realign = NULL;
@@ -2209,12 +2294,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
   as->mclim = as->mcbot + MCLIM_REDZONE;
   asm_setup_target(as);
 
-  do {
+  /*
+  ** This is a loop, because the MCode may have to be (re-)assembled
+  ** multiple times:
+  **
+  ** 1. as->realign is set (and the assembly aborted), if the arch-specific
+  **    backend wants the MCode to be aligned differently.
+  **
+  **    This is currently only the case on x86/x64, where small loops get
+  **    an aligned loop body plus a short branch. Not much effort is wasted,
+  **    because the abort happens very quickly and only once.
+  **
+  ** 2. The IR is immovable, since the MCode embeds pointers to various
+  **    constants inside the IR. But RENAMEs may need to be added to the IR
+  **    during assembly, which might grow and reallocate the IR. We check
+  **    at the end if the IR (in J->cur.ir) has actually grown, resize the
+  **    copy (in J->curfinal.ir) and try again.
+  **
+  **    95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
+  **    2 RENAMEs and only 0.5% have more than that. That's why we opt to
+  **    always have one spare slot in the IR (see above), which means we
+  **    have to redo the assembly for only ~2% of all traces.
+  **
+  **    Very, very rarely, this needs to be done repeatedly, since the
+  **    location of constants inside the IR (actually, reachability from
+  **    a global pointer) may affect register allocation and thus the
+  **    number of RENAMEs.
+  */
+  for (;;) {
     as->mcp = as->mctop;
 #ifdef LUA_USE_ASSERT
     as->mcp_prev = as->mcp;
 #endif
-    as->curins = T->nins;
+    as->ir = J->curfinal->ir;  /* Use the copied IR. */
+    as->curins = J->cur.nins = as->orignins;
+
     RA_DBG_START();
     RA_DBGX((as, "===== STOP ====="));
 
@@ -2242,22 +2356,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
       checkmclim(as);
       asm_ir(as, ir);
     }
-  } while (as->realign);  /* Retry in case the MCode needs to be realigned. */
 
-  /* Emit head of trace. */
-  RA_DBG_REF();
-  checkmclim(as);
-  if (as->gcsteps > 0) {
-    as->curins = as->T->snap[0].ref;
-    asm_snap_prep(as);  /* The GC check is a guard. */
-    asm_gc_check(as);
+    if (as->realign && J->curfinal->nins >= T->nins)
+      continue;  /* Retry in case only the MCode needs to be realigned. */
+
+    /* Emit head of trace. */
+    RA_DBG_REF();
+    checkmclim(as);
+    if (as->gcsteps > 0) {
+      as->curins = as->T->snap[0].ref;
+      asm_snap_prep(as);  /* The GC check is a guard. */
+      asm_gc_check(as);
+      as->curins = as->stopins;
+    }
+    ra_evictk(as);
+    if (as->parent)
+      asm_head_side(as);
+    else
+      asm_head_root(as);
+    asm_phi_fixup(as);
+
+    if (J->curfinal->nins >= T->nins) {  /* IR didn't grow? */
+      lua_assert(J->curfinal->nk == T->nk);
+      memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
+	     (T->nins - as->orignins) * sizeof(IRIns));  /* Copy RENAMEs. */
+      T->nins = J->curfinal->nins;
+      break;  /* Done. */
+    }
+
+    /* Otherwise try again with a bigger IR. */
+    lj_trace_free(J2G(J), J->curfinal);
+    J->curfinal = NULL;  /* In case lj_trace_alloc() OOMs. */
+    J->curfinal = lj_trace_alloc(J->L, T);
+    as->realign = NULL;
   }
-  ra_evictk(as);
-  if (as->parent)
-    asm_head_side(as);
-  else
-    asm_head_root(as);
-  asm_phi_fixup(as);
 
   RA_DBGX((as, "===== START ===="));
   RA_DBG_FLUSH();
@@ -2270,6 +2402,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
   if (!as->loopref)
     asm_tail_fixup(as, T->link);  /* Note: this may change as->mctop! */
   T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
+#if LJ_TARGET_MCODE_FIXUP
+  asm_mcode_fixup(T->mcode, T->szmcode);
+#endif
   lj_mcode_sync(T->mcode, origtop);
 }
 

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/lj_asm.h

@@ -1,6 +1,6 @@
 /*
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_ASM_H

+ 44 - 49
love/src/jni/LuaJIT-2.1/src/lj_asm_arm.h

@@ -1,6 +1,6 @@
 /*
 ** ARM IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Register allocator extensions --------------------------------------- */
@@ -426,7 +426,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   RegSet drop = RSET_SCRATCH;
-  int hiop = ((ir+1)->o == IR_HIOP);
+  int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   if (hiop && ra_hasreg((ir+1)->r))
@@ -520,8 +520,6 @@ static void asm_tobit(ASMState *as, IRIns *ir)
   emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
   emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
 }
-#else
-#define asm_tobit(as, ir)	lua_assert(0)
 #endif
 
 static void asm_conv(ASMState *as, IRIns *ir)
@@ -911,7 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 
 static void asm_uref(ASMState *as, IRIns *ir)
 {
-  /* NYI: Check that UREFO is still open and not aliasing a slot. */
   Reg dest = ra_dest(as, ir, RSET_GPR);
   if (irref_isk(ir->op1)) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
@@ -982,7 +979,7 @@ static ARMIns asm_fxloadins(IRIns *ir)
   case IRT_I16: return ARMI_LDRSH;
   case IRT_U16: return ARMI_LDRH;
   case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D;
-  case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S;
+  case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S;  /* fallthrough */
   default: return ARMI_LDR;
   }
 }
@@ -993,29 +990,33 @@ static ARMIns asm_fxstoreins(IRIns *ir)
   case IRT_I8: case IRT_U8: return ARMI_STRB;
   case IRT_I16: case IRT_U16: return ARMI_STRH;
   case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D;
-  case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S;
+  case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S;  /* fallthrough */
   default: return ARMI_STR;
   }
 }
 
 static void asm_fload(ASMState *as, IRIns *ir)
 {
-  Reg dest = ra_dest(as, ir, RSET_GPR);
-  Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
-  ARMIns ai = asm_fxloadins(ir);
-  int32_t ofs;
-  if (ir->op2 == IRFL_TAB_ARRAY) {
-    ofs = asm_fuseabase(as, ir->op1);
-    if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
-      emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
-      return;
+  if (ir->op1 == REF_NIL) {
+    lua_assert(!ra_used(ir));  /* We can end up here if DCE is turned off. */
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
+    ARMIns ai = asm_fxloadins(ir);
+    int32_t ofs;
+    if (ir->op2 == IRFL_TAB_ARRAY) {
+      ofs = asm_fuseabase(as, ir->op1);
+      if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+	emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
+	return;
+      }
     }
+    ofs = field_ofs[ir->op2];
+    if ((ai & 0x04000000))
+      emit_lso(as, ai, dest, idx, ofs);
+    else
+      emit_lsox(as, ai, dest, idx, ofs);
   }
-  ofs = field_ofs[ir->op2];
-  if ((ai & 0x04000000))
-    emit_lso(as, ai, dest, idx, ofs);
-  else
-    emit_lsox(as, ai, dest, idx, ofs);
 }
 
 static void asm_fstore(ASMState *as, IRIns *ir)
@@ -1372,8 +1373,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
   else
     asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
-#else
-#define asm_fpmath(as, ir)	lua_assert(0)
 #endif
 
 static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1413,14 +1412,29 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)
   emit_dn(as, ai^m, dest, left);
 }
 
-static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
+/* Try to drop cmp r, #0. */
+static ARMIns asm_drop_cmp0(ASMState *as, ARMIns ai)
 {
-  if (as->flagmcp == as->mcp) {  /* Drop cmp r, #0. */
+  if (as->flagmcp == as->mcp) {
+    uint32_t cc = (as->mcp[1] >> 28);
     as->flagmcp = NULL;
-    as->mcp++;
-    ai |= ARMI_S;
+    if (cc <= CC_NE) {
+      as->mcp++;
+      ai |= ARMI_S;
+    } else if (cc == CC_GE) {
+      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
+      ai |= ARMI_S;
+    } else if (cc == CC_LT) {
+      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
+      ai |= ARMI_S;
+    }  /* else: other conds don't work in general. */
   }
-  asm_intop(as, ir, ai);
+  return ai;
+}
+
+static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
+{
+  asm_intop(as, ir, asm_drop_cmp0(as, ai));
 }
 
 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
@@ -1492,13 +1506,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
 #define asm_subov(as, ir)	asm_sub(as, ir)
 #define asm_mulov(as, ir)	asm_mul(as, ir)
 
-#if LJ_SOFTFP
-#define asm_div(as, ir)		lua_assert(0)
-#define asm_pow(as, ir)		lua_assert(0)
-#define asm_abs(as, ir)		lua_assert(0)
-#define asm_atan2(as, ir)	lua_assert(0)
-#define asm_ldexp(as, ir)	lua_assert(0)
-#else
+#if !LJ_SOFTFP
 #define asm_div(as, ir)		asm_fparith(as, ir, ARMI_VDIV_D)
 #define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
 #define asm_abs(as, ir)		asm_fpunary(as, ir, ARMI_VABS_D)
@@ -1521,20 +1529,7 @@ static void asm_neg(ASMState *as, IRIns *ir)
 
 static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
 {
-  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
-    uint32_t cc = (as->mcp[1] >> 28);
-    as->flagmcp = NULL;
-    if (cc <= CC_NE) {
-      as->mcp++;
-      ai |= ARMI_S;
-    } else if (cc == CC_GE) {
-      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-      ai |= ARMI_S;
-    } else if (cc == CC_LT) {
-      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-      ai |= ARMI_S;
-    }  /* else: other conds don't work with bit ops. */
-  }
+  ai = asm_drop_cmp0(as, ai);
   if (ir->op2 == 0) {
     Reg dest = ra_dest(as, ir, RSET_GPR);
     uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);

+ 2043 - 0
love/src/jni/LuaJIT-2.1/src/lj_asm_arm64.h

@@ -0,0 +1,2043 @@
+/*
+** ARM64 IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+**
+** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
+** Sponsored by Cisco Systems, Inc.
+*/
+
+/* -- Register allocator extensions --------------------------------------- */
+
+/* Allocate a register with a hint. */
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
+{
+  Reg r = IR(ref)->r;
+  if (ra_noreg(r)) {
+    if (!ra_hashint(r) && !iscrossref(as, ref))
+      ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
+    r = ra_allocref(as, ref, allow);
+  }
+  ra_noweak(as, r);
+  return r;
+}
+
+/* Allocate two source registers for three-operand instructions. */
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+{
+  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+  Reg left = irl->r, right = irr->r;
+  if (ra_hasreg(left)) {
+    ra_noweak(as, left);
+    if (ra_noreg(right))
+      right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
+    else
+      ra_noweak(as, right);
+  } else if (ra_hasreg(right)) {
+    ra_noweak(as, right);
+    left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
+  } else if (ra_hashint(right)) {
+    right = ra_allocref(as, ir->op2, allow);
+    left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
+  } else {
+    left = ra_allocref(as, ir->op1, allow);
+    right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
+  }
+  return left | (right << 8);
+}
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Setup all needed exit stubs. */
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+{
+  ExitNo i;
+  MCode *mxp = as->mctop;
+  if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+    asm_mclimit(as);
+  /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
+  for (i = nexits-1; (int32_t)i >= 0; i--)
+    *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
+  *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
+  mxp--;
+  *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
+  *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
+  as->mctop = mxp;
+}
+
+static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
+{
+  /* Keep this in-sync with exitstub_trace_addr(). */
+  return as->mctop + exitno + 3;
+}
+
+/* Emit conditional branch to exit for guard. */
+static void asm_guardcc(ASMState *as, A64CC cc)
+{
+  MCode *target = asm_exitstub_addr(as, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    *p = A64I_B | A64F_S26(target-p);
+    emit_cond_branch(as, cc^1, p-1);
+    return;
+  }
+  emit_cond_branch(as, cc, target);
+}
+
+/* Emit test and branch instruction to exit for guard. */
+static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
+{
+  MCode *target = asm_exitstub_addr(as, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    *p = A64I_B | A64F_S26(target-p);
+    emit_tnb(as, ai^0x01000000u, r, bit, p-1);
+    return;
+  }
+  emit_tnb(as, ai, r, bit, target);
+}
+
+/* Emit compare and branch instruction to exit for guard. */
+static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
+{
+  MCode *target = asm_exitstub_addr(as, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    *p = A64I_B | A64F_S26(target-p);
+    emit_cnb(as, ai^0x01000000u, r, p-1);
+    return;
+  }
+  emit_cnb(as, ai, r, target);
+}
+
+/* -- Operand fusion ------------------------------------------------------ */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM	31
+
+static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
+{
+  if (irref_isk(ref)) {
+    IRIns *ir = IR(ref);
+    if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
+      *k = ir->i;
+      return 1;
+    } else if (checki32((int64_t)ir_k64(ir)->u64)) {
+      *k = (int32_t)ir_k64(ir)->u64;
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+  IRIns *ir = as->ir;
+  IRRef i = as->curins;
+  if (i > ref + CONFLICT_SEARCH_LIM)
+    return 0;  /* Give up, ref is too far away. */
+  while (--i > ref)
+    if (ir[i].o == conflict)
+      return 0;  /* Conflict found. */
+  return 1;  /* Ok, no conflict. */
+}
+
+/* Fuse the array base of colocated arrays. */
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
+      !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
+    return (int32_t)sizeof(GCtab);
+  return 0;
+}
+
+#define FUSE_REG	0x40000000
+
+/* Fuse array/hash/upvalue reference into register+offset operand. */
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
+			  A64Ins ins)
+{
+  IRIns *ir = IR(ref);
+  if (ra_noreg(ir->r)) {
+    if (ir->o == IR_AREF) {
+      if (mayfuse(as, ref)) {
+	if (irref_isk(ir->op2)) {
+	  IRRef tab = IR(ir->op1)->op1;
+	  int32_t ofs = asm_fuseabase(as, tab);
+	  IRRef refa = ofs ? tab : ir->op1;
+	  ofs += 8*IR(ir->op2)->i;
+	  if (emit_checkofs(ins, ofs)) {
+	    *ofsp = ofs;
+	    return ra_alloc1(as, refa, allow);
+	  }
+	} else {
+	  Reg base = ra_alloc1(as, ir->op1, allow);
+	  *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base));
+	  return base;
+	}
+      }
+    } else if (ir->o == IR_HREFK) {
+      if (mayfuse(as, ref)) {
+	int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+	if (emit_checkofs(ins, ofs)) {
+	  *ofsp = ofs;
+	  return ra_alloc1(as, ir->op1, allow);
+	}
+      }
+    } else if (ir->o == IR_UREFC) {
+      if (irref_isk(ir->op1)) {
+	GCfunc *fn = ir_kfunc(IR(ir->op1));
+	GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
+	int64_t ofs = glofs(as, &uv->tv);
+	if (emit_checkofs(ins, ofs)) {
+	  *ofsp = (int32_t)ofs;
+	  return RID_GL;
+	}
+      }
+    }
+  }
+  *ofsp = 0;
+  return ra_alloc1(as, ref, allow);
+}
+
+/* Fuse m operand into arithmetic/logic instructions. */
+static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  if (ra_hasreg(ir->r)) {
+    ra_noweak(as, ir->r);
+    return A64F_M(ir->r);
+  } else if (irref_isk(ref)) {
+    uint32_t m;
+    int64_t k = get_k64val(ir);
+    if ((ai & 0x1f000000) == 0x0a000000)
+      m = emit_isk13(k, irt_is64(ir->t));
+    else
+      m = emit_isk12(k);
+    if (m)
+      return m;
+  } else if (mayfuse(as, ref)) {
+    if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) ||
+	(ir->o == IR_ADD && ir->op1 == ir->op2)) {
+      A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR :
+		    ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL;
+      int shift = ir->o == IR_ADD ? 1 :
+		    (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
+      IRIns *irl = IR(ir->op1);
+      if (sh == A64SH_LSL &&
+	  irl->o == IR_CONV &&
+	  irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
+	  shift <= 4 &&
+	  canfuse(as, irl)) {
+	Reg m = ra_alloc1(as, irl->op1, allow);
+	return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift);
+      } else {
+	Reg m = ra_alloc1(as, ir->op1, allow);
+	return A64F_M(m) | A64F_SH(sh, shift);
+      }
+    } else if (ir->o == IR_CONV &&
+	       ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
+      Reg m = ra_alloc1(as, ir->op1, allow);
+      return A64F_M(m) | A64F_EX(A64EX_SXTW);
+    }
+  }
+  return A64F_M(ra_allocref(as, ref, allow));
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
+			 RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  Reg base;
+  int32_t ofs = 0;
+  if (ra_noreg(ir->r) && canfuse(as, ir)) {
+    if (ir->o == IR_ADD) {
+      if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) {
+	ref = ir->op1;
+      } else {
+	Reg rn, rm;
+	IRRef lref = ir->op1, rref = ir->op2;
+	IRIns *irl = IR(lref);
+	if (mayfuse(as, irl->op1)) {
+	  unsigned int shift = 4;
+	  if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
+	    shift = (IR(irl->op2)->i & 63);
+	  } else if (irl->o == IR_ADD && irl->op1 == irl->op2) {
+	    shift = 1;
+	  }
+	  if ((ai >> 30) == shift) {
+	    lref = irl->op1;
+	    irl = IR(lref);
+	    ai |= A64I_LS_SH;
+	  }
+	}
+	if (irl->o == IR_CONV &&
+	    irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
+	    canfuse(as, irl)) {
+	  lref = irl->op1;
+	  ai |= A64I_LS_SXTWx;
+	} else {
+	  ai |= A64I_LS_LSLx;
+	}
+	rm = ra_alloc1(as, lref, allow);
+	rn = ra_alloc1(as, rref, rset_exclude(allow, rm));
+	emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm);
+	return;
+      }
+    } else if (ir->o == IR_STRREF) {
+      if (asm_isk32(as, ir->op2, &ofs)) {
+	ref = ir->op1;
+      } else if (asm_isk32(as, ir->op1, &ofs)) {
+	ref = ir->op2;
+      } else {
+	Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2;
+	Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1;
+	Reg rn = ra_alloc1(as, refv, allow);
+	IRIns *irr = IR(refk);
+	uint32_t m;
+	if (irr+1 == ir && !ra_used(irr) &&
+	    irr->o == IR_ADD && irref_isk(irr->op2)) {
+	  ofs = sizeof(GCstr) + IR(irr->op2)->i;
+	  if (emit_checkofs(ai, ofs)) {
+	    Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
+	    m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
+	    goto skipopm;
+	  }
+	}
+	m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn));
+	ofs = sizeof(GCstr);
+      skipopm:
+	emit_lso(as, ai, rd, rd, ofs);
+	emit_dn(as, A64I_ADDx^m, rd, rn);
+	return;
+      }
+      ofs += sizeof(GCstr);
+      if (!emit_checkofs(ai, ofs)) {
+	Reg rn = ra_alloc1(as, ref, allow);
+	Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
+	emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm);
+	return;
+      }
+    }
+  }
+  base = ra_alloc1(as, ref, allow);
+  emit_lso(as, ai, (rd & 31), base, ofs);
+}
+
+/* Fuse FP multiply-add/sub. */
+static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  IRIns *irm;
+  if (lref != rref &&
+      ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+       ra_noreg(irm->r)) ||
+       (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+       (rref = lref, ai = air, ra_noreg(irm->r))))) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
+    Reg left = ra_alloc2(as, irm,
+			 rset_exclude(rset_exclude(RSET_FPR, dest), add));
+    Reg right = (left >> 8); left &= 255;
+    emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
+    return 1;
+  }
+  return 0;
+}
+
+/* Fuse BAND + BSHL/BSHR into UBFM. */
+static int asm_fuseandshift(ASMState *as, IRIns *ir)
+{
+  IRIns *irl = IR(ir->op1);
+  lua_assert(ir->o == IR_BAND);
+  if (canfuse(as, irl) && irref_isk(ir->op2)) {
+    uint64_t mask = get_k64val(IR(ir->op2));
+    if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) {
+      int32_t shmask = irt_is64(irl->t) ? 63 : 31;
+      int32_t shift = (IR(irl->op2)->i & shmask);
+      int32_t imms = shift;
+      if (irl->o == IR_BSHL) {
+	mask >>= shift;
+	shift = (shmask-shift+1) & shmask;
+	imms = 0;
+      }
+      if (mask && !((mask+1) & mask)) {  /* Contiguous 1-bits at the bottom. */
+	Reg dest = ra_dest(as, ir, RSET_GPR);
+	Reg left = ra_alloc1(as, irl->op1, RSET_GPR);
+	A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw;
+	imms += 63 - emit_clz64(mask);
+	if (imms > shmask) imms = shmask;
+	emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left);
+	return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
+static int asm_fuseorshift(ASMState *as, IRIns *ir)
+{
+  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+  lua_assert(ir->o == IR_BOR);
+  if (canfuse(as, irl) && canfuse(as, irr) &&
+      ((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
+       (irl->o == IR_BSHL && irr->o == IR_BSHR))) {
+    if (irref_isk(irl->op2) && irref_isk(irr->op2)) {
+      IRRef lref = irl->op1, rref = irr->op1;
+      uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i;
+      if (irl->o == IR_BSHR) {  /* BSHR needs to be the right operand. */
+	uint32_t tmp2;
+	IRRef tmp1 = lref; lref = rref; rref = tmp1;
+	tmp2 = lshift; lshift = rshift; rshift = tmp2;
+      }
+      if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) {
+	A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw;
+	Reg dest = ra_dest(as, ir, RSET_GPR);
+	Reg left = ra_alloc1(as, lref, RSET_GPR);
+	Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
+	emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right);
+	return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n, nargs = CCI_XNARGS(ci);
+  int32_t ofs = 0;
+  Reg gpr, fpr = REGARG_FIRSTFPR;
+  if ((void *)ci->func)
+    emit_call(as, (void *)ci->func);
+  for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
+    as->cost[gpr] = REGCOST(~0u, ASMREF_L);
+  gpr = REGARG_FIRSTGPR;
+  for (n = 0; n < nargs; n++) { /* Setup args. */
+    IRRef ref = args[n];
+    IRIns *ir = IR(ref);
+    if (ref) {
+      if (irt_isfp(ir->t)) {
+	if (fpr <= REGARG_LASTFPR) {
+	  lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */
+	  ra_leftov(as, fpr, ref);
+	  fpr++;
+	} else {
+	  Reg r = ra_alloc1(as, ref, RSET_FPR);
+	  emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
+	  ofs += 8;
+	}
+      } else {
+	if (gpr <= REGARG_LASTGPR) {
+	  lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
+	  ra_leftov(as, gpr, ref);
+	  gpr++;
+	} else {
+	  Reg r = ra_alloc1(as, ref, RSET_GPR);
+	  emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
+	  ofs += 8;
+	}
+      }
+    }
+  }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  RegSet drop = RSET_SCRATCH;
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r); /* Dest reg handled below. */
+  ra_evictset(as, drop); /* Evictions must be performed first. */
+  if (ra_used(ir)) {
+    lua_assert(!irt_ispri(ir->t));
+    if (irt_isfp(ir->t)) {
+      if (ci->flags & CCI_CASTU64) {
+	Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
+	emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R,
+		dest, RID_RET);
+      } else {
+	ra_destreg(as, ir, RID_FPRET);
+      }
+    } else {
+      ra_destreg(as, ir, RID_RET);
+    }
+  }
+  UNUSED(ci);
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  CCallInfo ci;
+  IRRef func;
+  IRIns *irf;
+  ci.flags = asm_callx_flags(as, ir);
+  asm_collectargs(as, ir, &ci, args);
+  asm_setupresult(as, ir, &ci);
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  if (irref_isk(func)) {  /* Call to constant address. */
+    ci.func = (ASMFunction)(ir_k64(irf)->u64);
+  } else {  /* Need a non-argument register for indirect calls. */
+    Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
+    emit_n(as, A64I_BLR, freg);
+    ci.func = (ASMFunction)(void *)0;
+  }
+  asm_gencall(as, &ci, args);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+  void *pc = ir_kptr(IR(ir->op2));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
+  as->topslot -= (BCReg)delta;
+  if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
+  /* Need to force a spill on REF_BASE now to update the stack slot. */
+  emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
+  emit_setgl(as, base, jit_base);
+  emit_addptr(as, base, -8*delta);
+  asm_guardcc(as, CC_NE);
+  emit_nm(as, A64I_CMPx, RID_TMP,
+	  ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base)));
+  emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  asm_guardcc(as, CC_NE);
+  emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31));
+  emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest);
+  emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31));
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_FPR;
+  Reg left = ra_alloc1(as, ir->op1, allow);
+  Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
+  Reg tmp = ra_scratch(as, rset_clear(allow, right));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31));
+  emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31));
+}
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+  int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
+  int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+  IRRef lref = ir->op1;
+  lua_assert(irt_type(ir->t) != st);
+  if (irt_isfp(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    if (stfp) {  /* FP to FP conversion. */
+      emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32,
+	      (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31));
+    } else {  /* Integer to FP conversion. */
+      Reg left = ra_alloc1(as, lref, RSET_GPR);
+      A64Ins ai = irt_isfloat(ir->t) ?
+	(((IRT_IS64 >> st) & 1) ?
+	 (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) :
+	 (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) :
+	(((IRT_IS64 >> st) & 1) ?
+	 (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) :
+	 (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32));
+      emit_dn(as, ai, (dest & 31), left);
+    }
+  } else if (stfp) {  /* FP to integer conversion. */
+    if (irt_isguard(ir->t)) {
+      /* Checked conversions are only supported from number to int. */
+      lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+      asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+    } else {
+      Reg left = ra_alloc1(as, lref, RSET_FPR);
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      A64Ins ai = irt_is64(ir->t) ?
+	(st == IRT_NUM ?
+	 (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
+	 (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
+	(st == IRT_NUM ?
+	 (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
+	 (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
+      emit_dn(as, ai, dest, (left & 31));
+    }
+  } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_alloc1(as, lref, RSET_GPR);
+    A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
+		st == IRT_U8 ? A64I_UXTBw :
+		st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
+    lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+    emit_dn(as, ai, dest, left);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    if (irt_is64(ir->t)) {
+      if (st64 || !(ir->op2 & IRCONV_SEXT)) {
+	/* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
+	ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+      } else {  /* 32 to 64 bit sign extension. */
+	Reg left = ra_alloc1(as, lref, RSET_GPR);
+	emit_dn(as, A64I_SXTW, dest, left);
+      }
+    } else {
+      if (st64) {
+	/* This is either a 32 bit reg/reg mov which zeroes the hiword
+	** or a load of the loword from a 64 bit address.
+	*/
+	Reg left = ra_alloc1(as, lref, RSET_GPR);
+	emit_dm(as, A64I_MOVw, dest, left);
+      } else {  /* 32/32 bit no-op (cast). */
+	ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+      }
+    }
+  }
+}
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+  IRRef args[2];
+  Reg dest = 0, tmp;
+  int destused = ra_used(ir);
+  int32_t ofs = 0;
+  ra_evictset(as, RSET_SCRATCH);
+  if (destused) {
+    if (ra_hasspill(ir->s)) {
+      ofs = sps_scale(ir->s);
+      destused = 0;
+      if (ra_hasreg(ir->r)) {
+	ra_free(as, ir->r);
+	ra_modified(as, ir->r);
+	emit_spload(as, ir, ir->r, ofs);
+      }
+    } else {
+      dest = ra_dest(as, ir, RSET_FPR);
+    }
+  }
+  if (destused)
+    emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
+  asm_guardcnb(as, A64I_CBZ, RID_RET);
+  args[0] = ir->op1; /* GCstr *str */
+  args[1] = ASMREF_TMP1; /* TValue *n  */
+  asm_gencall(as, ci, args);
+  tmp = ra_releasetmp(as, ASMREF_TMP1);
+  emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR);
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+/* Store tagged value for ref at base+ofs. */
+static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
+{
+  RegSet allow = rset_exclude(RSET_GPR, base);
+  IRIns *ir = IR(ref);
+  lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
+  if (irref_isk(ref)) {
+    TValue k;
+    lj_ir_kvalue(as->J->L, &k, ir);
+    emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs);
+  } else {
+    Reg src = ra_alloc1(as, ref, allow);
+    rset_clear(allow, src);
+    if (irt_isinteger(ir->t)) {
+      Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
+      emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
+      emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src);
+    } else {
+      Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+      emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
+      emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type);
+    }
+  }
+}
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (irt_isnum(ir->t)) {
+    if (irref_isk(ref)) {
+      /* Use the number constant itself as a TValue. */
+      ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
+    } else {
+      /* Otherwise force a spill and use the spill slot. */
+      emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+    }
+  } else {
+    /* Otherwise use g->tmptv to hold the TValue. */
+    asm_tvstore64(as, dest, 0, ref);
+    ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
+  }
+}
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx, base;
+  if (irref_isk(ir->op2)) {
+    IRRef tab = IR(ir->op1)->op1;
+    int32_t ofs = asm_fuseabase(as, tab);
+    IRRef refa = ofs ? tab : ir->op1;
+    uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i);
+    if (k) {
+      base = ra_alloc1(as, refa, RSET_GPR);
+      emit_dn(as, A64I_ADDx^k, dest, base);
+      return;
+    }
+  }
+  base = ra_alloc1(as, ir->op1, RSET_GPR);
+  idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+  emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx);
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+**   Node *n = hashkey(t, key);
+**   do {
+**     if (lj_obj_equal(&n->key, key)) return &n->val;
+**   } while ((n = nextnode(n)));
+**   return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+{
+  RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
+  Reg dest = ra_dest(as, ir, allow);
+  Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+  Reg key = 0, tmp = RID_TMP;
+  Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
+  IRRef refkey = ir->op2;
+  IRIns *irkey = IR(refkey);
+  int isk = irref_isk(ir->op2);
+  IRType1 kt = irkey->t;
+  uint32_t k = 0;
+  uint32_t khash;
+  MCLabel l_end, l_loop, l_next;
+  rset_clear(allow, tab);
+
+  if (!isk) {
+    key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
+    rset_clear(allow, key);
+    if (!irt_isstr(kt)) {
+      tmp = ra_scratch(as, allow);
+      rset_clear(allow, tmp);
+    }
+  } else if (irt_isnum(kt)) {
+    int64_t val = (int64_t)ir_knum(irkey)->u64;
+    if (!(k = emit_isk12(val))) {
+      key = ra_allock(as, val, allow);
+      rset_clear(allow, key);
+    }
+  } else if (!irt_ispri(kt)) {
+    if (!(k = emit_isk12(irkey->i))) {
+      key = ra_alloc1(as, refkey, allow);
+      rset_clear(allow, key);
+    }
+  }
+
+  /* Allocate constants early. */
+  if (irt_isnum(kt)) {
+    if (!isk) {
+      tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
+      ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
+      rset_clear(allow, tisnum);
+    }
+  } else if (irt_isaddr(kt)) {
+    if (isk) {
+      int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
+      scr = ra_allock(as, kk, allow);
+    } else {
+      scr = ra_scratch(as, allow);
+    }
+    rset_clear(allow, scr);
+  } else {
+    lua_assert(irt_ispri(kt) && !irt_isnil(kt));
+    type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
+    scr = ra_scratch(as, rset_clear(allow, type));
+    rset_clear(allow, scr);
+  }
+
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
+  l_end = emit_label(as);
+  as->invmcp = NULL;
+  if (merge == IR_NE)
+    asm_guardcc(as, CC_AL);
+  else if (destused)
+    emit_loada(as, dest, niltvg(J2G(as->J)));
+
+  /* Follow hash chain until the end. */
+  l_loop = --as->mcp;
+  emit_n(as, A64I_CMPx^A64I_K12^0, dest);
+  emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
+  l_next = emit_label(as);
+
+  /* Type and value comparison. */
+  if (merge == IR_EQ)
+    asm_guardcc(as, CC_EQ);
+  else
+    emit_cond_branch(as, CC_EQ, l_end);
+
+  if (irt_isnum(kt)) {
+    if (isk) {
+      /* Assumes -0.0 is already canonicalized to +0.0. */
+      if (k)
+	emit_n(as, A64I_CMPx^k, tmp);
+      else
+	emit_nm(as, A64I_CMPx, key, tmp);
+      emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+    } else {
+      emit_nm(as, A64I_FCMPd, key, ftmp);
+      emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
+      emit_cond_branch(as, CC_LO, l_next);
+      emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
+      emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
+    }
+  } else if (irt_isaddr(kt)) {
+    if (isk) {
+      emit_nm(as, A64I_CMPx, scr, tmp);
+      emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+    } else {
+      emit_nm(as, A64I_CMPx, tmp, scr);
+      emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
+    }
+  } else {
+    emit_nm(as, A64I_CMPw, scr, type);
+    emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
+  }
+
+  *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
+  if (!isk && irt_isaddr(kt)) {
+    type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
+    emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
+    rset_clear(allow, type);
+  }
+  /* Load main position relative to tab->node into dest. */
+  khash = isk ? ir_khash(irkey) : 1;
+  if (khash == 0) {
+    emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
+  } else {
+    emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest);
+    emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest);
+    emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node));
+    if (isk) {
+      Reg tmphash = ra_allock(as, khash, allow);
+      emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
+      emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+    } else if (irt_isstr(kt)) {
+      /* Fetch of str->hash is cheaper than ra_allock. */
+      emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+      emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash));
+      emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+    } else {  /* Must match with hash*() in lj_tab.c. */
+      emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+      emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
+      emit_dnm(as, A64I_SUBw, dest, dest, tmp);
+      emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
+      emit_dnm(as, A64I_EORw, dest, dest, tmp);
+      emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
+      emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
+      emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
+      emit_dnm(as, A64I_EORw, tmp, tmp, dest);
+      if (irt_isnum(kt)) {
+	emit_dnm(as, A64I_ADDw, dest, dest, dest);
+	emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
+	emit_dm(as, A64I_MOVw, tmp, dest);
+	emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
+      } else {
+	checkmclim(as);
+	emit_dm(as, A64I_MOVw, tmp, key);
+	emit_dnm(as, A64I_EORw, dest, dest,
+		 ra_allock(as, irt_toitype(kt) << 15, allow));
+	emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
+	emit_dm(as, A64I_MOVx, dest, key);
+      }
+    }
+  }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+  IRIns *kslot = IR(ir->op2);
+  IRIns *irkey = IR(kslot->op1);
+  int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+  int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+  int bigofs = !emit_checkofs(A64I_LDRx, ofs);
+  Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+  Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg key, idx = node;
+  RegSet allow = rset_exclude(RSET_GPR, node);
+  uint64_t k;
+  lua_assert(ofs % sizeof(Node) == 0);
+  if (bigofs) {
+    idx = dest;
+    rset_clear(allow, dest);
+    kofs = (int32_t)offsetof(Node, key);
+  } else if (ra_hasreg(dest)) {
+    emit_opk(as, A64I_ADDx, dest, node, ofs, allow);
+  }
+  asm_guardcc(as, CC_NE);
+  if (irt_ispri(irkey->t)) {
+    k = ~((int64_t)~irt_toitype(irkey->t) << 47);
+  } else if (irt_isnum(irkey->t)) {
+    k = ir_knum(irkey)->u64;
+  } else {
+    k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
+  }
+  key = ra_scratch(as, allow);
+  emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
+  emit_lso(as, A64I_LDRx, key, idx, kofs);
+  if (bigofs)
+    emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  if (irref_isk(ir->op1)) {
+    GCfunc *fn = ir_kfunc(IR(ir->op1));
+    MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+    emit_lsptr(as, A64I_LDRx, dest, v);
+  } else {
+    Reg uv = ra_scratch(as, RSET_GPR);
+    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (ir->o == IR_UREFC) {
+      asm_guardcc(as, CC_NE);
+      emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
+      emit_opk(as, A64I_ADDx, dest, uv,
+	       (int32_t)offsetof(GCupval, tv), RSET_GPR);
+      emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    } else {
+      emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
+    }
+    emit_lso(as, A64I_LDRx, uv, func,
+	     (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
+  }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+  UNUSED(as); UNUSED(ir);
+  lua_assert(!ra_used(ir));
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_GPR;
+  Reg dest = ra_dest(as, ir, allow);
+  Reg base = ra_alloc1(as, ir->op1, allow);
+  IRIns *irr = IR(ir->op2);
+  int32_t ofs = sizeof(GCstr);
+  uint32_t m;
+  rset_clear(allow, base);
+  if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) {
+    emit_dn(as, A64I_ADDx^m, dest, base);
+  } else {
+    emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest);
+    emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow));
+  }
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static A64Ins asm_fxloadins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: return A64I_LDRB ^ A64I_LS_S;
+  case IRT_U8: return A64I_LDRB;
+  case IRT_I16: return A64I_LDRH ^ A64I_LS_S;
+  case IRT_U16: return A64I_LDRH;
+  case IRT_NUM: return A64I_LDRd;
+  case IRT_FLOAT: return A64I_LDRs;
+  default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw;
+  }
+}
+
+static A64Ins asm_fxstoreins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: case IRT_U8: return A64I_STRB;
+  case IRT_I16: case IRT_U16: return A64I_STRH;
+  case IRT_NUM: return A64I_STRd;
+  case IRT_FLOAT: return A64I_STRs;
+  default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw;
+  }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx;
+  A64Ins ai = asm_fxloadins(ir);
+  int32_t ofs;
+  if (ir->op1 == REF_NIL) {
+    idx = RID_GL;
+    ofs = (ir->op2 << 2) - GG_OFS(g);
+  } else {
+    idx = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (ir->op2 == IRFL_TAB_ARRAY) {
+      ofs = asm_fuseabase(as, ir->op1);
+      if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+	emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx);
+	return;
+      }
+    }
+    ofs = field_ofs[ir->op2];
+  }
+  emit_lso(as, ai, (dest & 31), idx, ofs);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+    IRIns *irf = IR(ir->op1);
+    Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+    int32_t ofs = field_ofs[irf->op2];
+    emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs);
+  }
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+  lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+}
+
+static void asm_xstore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+    asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+		 rset_exclude(RSET_GPR, src));
+  }
+}
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+  Reg idx, tmp, type;
+  int32_t ofs = 0;
+  RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+  lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+	     irt_isint(ir->t));
+  if (ra_used(ir)) {
+    Reg dest = ra_dest(as, ir, allow);
+    tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
+    if (irt_isaddr(ir->t)) {
+      emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
+    } else if (irt_isnum(ir->t)) {
+      emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
+    } else if (irt_isint(ir->t)) {
+      emit_dm(as, A64I_MOVw, dest, dest);
+    }
+  } else {
+    tmp = ra_scratch(as, gpr);
+  }
+  type = ra_scratch(as, rset_clear(gpr, tmp));
+  idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
+  /* Always do the type check, even if the load result is unused. */
+  asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
+  if (irt_type(ir->t) >= IRT_NUM) {
+    lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
+    emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+	    ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
+  } else if (irt_isaddr(ir->t)) {
+    emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
+    emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
+  } else if (irt_isnil(ir->t)) {
+    emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
+  } else {
+    emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+	    ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp);
+  }
+  if (ofs & FUSE_REG)
+    emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
+  else
+    emit_lso(as, A64I_LDRx, tmp, idx, ofs);
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    RegSet allow = RSET_GPR;
+    Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE;
+    int32_t ofs = 0;
+    if (irt_isnum(ir->t)) {
+      src = ra_alloc1(as, ir->op2, RSET_FPR);
+      idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd);
+      if (ofs & FUSE_REG)
+	emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31));
+      else
+	emit_lso(as, A64I_STRd, (src & 31), idx, ofs);
+    } else {
+      if (!irt_ispri(ir->t)) {
+	src = ra_alloc1(as, ir->op2, allow);
+	rset_clear(allow, src);
+	if (irt_isinteger(ir->t))
+	  type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
+	else
+	  type = ra_allock(as, irt_toitype(ir->t), allow);
+      } else {
+	tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow);
+      }
+      idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type),
+			   A64I_STRx);
+      if (ofs & FUSE_REG)
+	emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
+      else
+	emit_lso(as, A64I_STRx, tmp, idx, ofs);
+      if (ra_hasreg(src)) {
+	if (irt_isinteger(ir->t)) {
+	  emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src);
+	} else {
+	  emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type);
+	}
+      }
+    }
+  }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = 8*((int32_t)ir->op1-2);
+  IRType1 t = ir->t;
+  Reg dest = RID_NONE, base;
+  RegSet allow = RSET_GPR;
+  lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
+  lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+  if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
+    dest = ra_scratch(as, RSET_FPR);
+    asm_tointg(as, ir, dest);
+    t.irt = IRT_NUM;  /* Continue with a regular number type check. */
+  } else if (ra_used(ir)) {
+    Reg tmp = RID_NONE;
+    if ((ir->op2 & IRSLOAD_CONVERT))
+      tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
+    lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t));
+    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
+    base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest));
+    if (irt_isaddr(t)) {
+      emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
+    } else if ((ir->op2 & IRSLOAD_CONVERT)) {
+      if (irt_isint(t)) {
+	emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31));
+	/* If value is already loaded for type check, move it to FPR. */
+	if ((ir->op2 & IRSLOAD_TYPECHECK))
+	  emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest);
+	else
+	  dest = tmp;
+	t.irt = IRT_NUM;  /* Check for original type. */
+      } else {
+	emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp);
+	dest = tmp;
+	t.irt = IRT_INT;  /* Check for original type. */
+      }
+    } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
+      emit_dm(as, A64I_MOVw, dest, dest);
+    }
+    goto dotypecheck;
+  }
+  base = ra_alloc1(as, REF_BASE, allow);
+dotypecheck:
+  rset_clear(allow, base);
+  if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+    Reg tmp;
+    if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) {
+      tmp = dest;
+    } else {
+      tmp = ra_scratch(as, allow);
+      rset_clear(allow, tmp);
+    }
+    if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT))
+      emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
+    /* Need type check, even if the load result is unused. */
+    asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
+    if (irt_type(t) >= IRT_NUM) {
+      lua_assert(irt_isinteger(t) || irt_isnum(t));
+      emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+	      ra_allock(as, LJ_TISNUM << 15, allow), tmp);
+    } else if (irt_isnil(t)) {
+      emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
+    } else if (irt_ispri(t)) {
+      emit_nm(as, A64I_CMPx,
+	      ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
+    } else {
+      Reg type = ra_scratch(as, allow);
+      emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
+      emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
+    }
+    emit_lso(as, A64I_LDRx, tmp, base, ofs);
+    return;
+  }
+  if (ra_hasreg(dest)) {
+    emit_lso(as, irt_isnum(t) ? A64I_LDRd :
+	     (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
+	     ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
+  }
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+  CTState *cts = ctype_ctsG(J2G(as->J));
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+  IRRef args[4];
+  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
+
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCcdata * */
+  /* Initialize immutable cdata object. */
+  if (ir->o == IR_CNEWI) {
+    int32_t ofs = sizeof(GCcdata);
+    Reg r = ra_alloc1(as, ir->op2, allow);
+    lua_assert(sz == 4 || sz == 8);
+    emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
+  }
+
+  /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
+  {
+    Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
+    emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
+    emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
+    emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
+    if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1);
+  }
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
+  asm_gencall(as, ci, args);
+  ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+	       ra_releasetmp(as, ASMREF_TMP1));
+}
+#else
+#define asm_cnew(as, ir)	((void)0)
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+  Reg gr = ra_allock(as, i64ptr(J2G(as->J)),
+		     rset_exclude(rset_exclude(RSET_GPR, tab), link));
+  Reg mark = RID_TMP;
+  MCLabel l_end = emit_label(as);
+  emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
+  emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+  emit_lso(as, A64I_STRx, tab, gr,
+	   (int32_t)offsetof(global_State, gc.grayagain));
+  emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
+  emit_lso(as, A64I_LDRx, link, gr,
+	   (int32_t)offsetof(global_State, gc.grayagain));
+  emit_cond_branch(as, CC_EQ, l_end);
+  emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
+  emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+  IRRef args[2];
+  MCLabel l_end;
+  RegSet allow = RSET_GPR;
+  Reg obj, val, tmp;
+  /* No need for other object barriers (yet). */
+  lua_assert(IR(ir->op1)->o == IR_UREFC);
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ir->op1;      /* TValue *tv      */
+  asm_gencall(as, ci, args);
+  ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) );
+  obj = IR(ir->op1)->r;
+  tmp = ra_scratch(as, rset_exclude(allow, obj));
+  emit_cond_branch(as, CC_EQ, l_end);
+  emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
+  emit_cond_branch(as, CC_EQ, l_end);
+  emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
+  val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
+  emit_lso(as, A64I_LDRB, tmp, obj,
+     (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
+  emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
+}
+
+/* -- Arithmetic and logic operations ------------------------------------- */
+
+static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = (left >> 8); left &= 255;
+  emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31));
+}
+
+static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+  emit_dn(as, ai, (dest & 31), (left & 31));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  IRFPMathOp fpm = (IRFPMathOp)ir->op2;
+  if (fpm == IRFPM_SQRT) {
+    asm_fpunary(as, ir, A64I_FSQRTd);
+  } else if (fpm <= IRFPM_TRUNC) {
+    asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
+			fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
+  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
+    return;
+  } else {
+    asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
+  }
+}
+
+static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
+{
+  IRIns *ir;
+  if (irref_isk(rref))
+    return 0;  /* Don't swap constants to the left. */
+  if (irref_isk(lref))
+    return 1;  /* But swap constants to the right. */
+  ir = IR(rref);
+  if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
+      (ir->o == IR_ADD && ir->op1 == ir->op2) ||
+      (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
+    return 0;  /* Don't swap fusable operands to the left. */
+  ir = IR(lref);
+  if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
+      (ir->o == IR_ADD && ir->op1 == ir->op2) ||
+      (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
+    return 1;  /* But swap fusable operands to the right. */
+  return 0;  /* Otherwise don't swap. */
+}
+
+static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  Reg left, dest = ra_dest(as, ir, RSET_GPR);
+  uint32_t m;
+  if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) {
+    IRRef tmp = lref; lref = rref; rref = tmp;
+  }
+  left = ra_hintalloc(as, lref, dest, RSET_GPR);
+  if (irt_is64(ir->t)) ai |= A64I_X;
+  m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
+  if (irt_isguard(ir->t)) {  /* For IR_ADDOV etc. */
+    asm_guardcc(as, CC_VS);
+    ai |= A64I_S;
+  }
+  emit_dn(as, ai^m, dest, left);
+}
+
+static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  if (as->flagmcp == as->mcp) {  /* Drop cmp r, #0. */
+    as->flagmcp = NULL;
+    as->mcp++;
+    ai |= A64I_S;
+  }
+  asm_intop(as, ir, ai);
+}
+
+static void asm_intneg(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left);
+}
+
+/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
+static void asm_intmul(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
+  Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  if (irt_isguard(ir->t)) {  /* IR_MULOV */
+    asm_guardcc(as, CC_NE);
+    emit_dm(as, A64I_MOVw, dest, dest);  /* Zero-extend. */
+    emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
+    emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
+    emit_dnm(as, A64I_SMULL, dest, right, left);
+  } else {
+    emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
+  }
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
+      asm_fparith(as, ir, A64I_FADDd);
+    return;
+  }
+  asm_intop_s(as, ir, A64I_ADDw);
+}
+
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
+      asm_fparith(as, ir, A64I_FSUBd);
+    return;
+  }
+  asm_intop_s(as, ir, A64I_SUBw);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fparith(as, ir, A64I_FMULd);
+    return;
+  }
+  asm_intmul(as, ir);
+}
+
+static void asm_div(ASMState *as, IRIns *ir)
+{
+#if LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+					  IRCALL_lj_carith_divu64);
+  else
+#endif
+    asm_fparith(as, ir, A64I_FDIVd);
+}
+
+static void asm_pow(ASMState *as, IRIns *ir)
+{
+#if LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+					  IRCALL_lj_carith_powu64);
+  else
+#endif
+    asm_callid(as, ir, IRCALL_lj_vm_powi);
+}
+
+#define asm_addov(as, ir)	asm_add(as, ir)
+#define asm_subov(as, ir)	asm_sub(as, ir)
+#define asm_mulov(as, ir)	asm_mul(as, ir)
+
+#define asm_abs(as, ir)		asm_fpunary(as, ir, A64I_FABS)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
+static void asm_mod(ASMState *as, IRIns *ir)
+{
+#if LJ_HASFFI
+  if (!irt_isint(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					  IRCALL_lj_carith_modu64);
+  else
+#endif
+    asm_callid(as, ir, IRCALL_lj_vm_modi);
+}
+
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fpunary(as, ir, A64I_FNEGd);
+    return;
+  }
+  asm_intneg(as, ir);
+}
+
+static void asm_band(ASMState *as, IRIns *ir)
+{
+  A64Ins ai = A64I_ANDw;
+  if (asm_fuseandshift(as, ir))
+    return;
+  if (as->flagmcp == as->mcp) {
+    /* Try to drop cmp r, #0. */
+    as->flagmcp = NULL;
+    as->mcp++;
+    ai = A64I_ANDSw;
+  }
+  asm_intop(as, ir, ai);
+}
+
+static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  IRIns *irl = IR(lref), *irr = IR(rref);
+  if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) ||
+      (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) {
+    Reg left, dest = ra_dest(as, ir, RSET_GPR);
+    uint32_t m;
+    if (irl->o == IR_BNOT) {
+      IRRef tmp = lref; lref = rref; rref = tmp;
+    }
+    left = ra_alloc1(as, lref, RSET_GPR);
+    ai |= A64I_ON;
+    if (irt_is64(ir->t)) ai |= A64I_X;
+    m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left));
+    emit_dn(as, ai^m, dest, left);
+  } else {
+    asm_intop(as, ir, ai);
+  }
+}
+
+static void asm_bor(ASMState *as, IRIns *ir)
+{
+  if (asm_fuseorshift(as, ir))
+    return;
+  asm_borbxor(as, ir, A64I_ORRw);
+}
+
+#define asm_bxor(as, ir)	asm_borbxor(as, ir, A64I_EORw)
+
+static void asm_bnot(ASMState *as, IRIns *ir)
+{
+  A64Ins ai = A64I_MVNw;
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+  if (irt_is64(ir->t)) ai |= A64I_X;
+  emit_d(as, ai^m, dest);
+}
+
+static void asm_bswap(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+  emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left);
+}
+
+static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh)
+{
+  int32_t shmask = irt_is64(ir->t) ? 63 : 31;
+  if (irref_isk(ir->op2)) {  /* Constant shifts. */
+    Reg left, dest = ra_dest(as, ir, RSET_GPR);
+    int32_t shift = (IR(ir->op2)->i & shmask);
+    IRIns *irl = IR(ir->op1);
+    if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw;
+
+    /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
+    if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) {
+      if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
+	int32_t shift2 = (IR(irl->op2)->i & shmask);
+	shift = ((shift - shift2) & shmask);
+	shmask -= shift2;
+	ir = irl;
+      }
+    }
+
+    left = ra_alloc1(as, ir->op1, RSET_GPR);
+    switch (sh) {
+    case A64SH_LSL:
+      emit_dn(as, ai | A64F_IMMS(shmask-shift) |
+		  A64F_IMMR((shmask-shift+1)&shmask), dest, left);
+      break;
+    case A64SH_LSR: case A64SH_ASR:
+      emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left);
+      break;
+    case A64SH_ROR:
+      emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left);
+      break;
+    }
+  } else {  /* Variable-length shifts. */
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+    Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right);
+  }
+}
+
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
+#define asm_brol(as, ir)	lua_assert(0)
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right);
+  emit_nm(as, A64I_CMPw, left, right);
+}
+
+static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc)
+{
+  Reg dest = (ra_dest(as, ir, RSET_FPR) & 31);
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = ((left >> 8) & 31); left &= 31;
+  emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right);
+  emit_nm(as, A64I_FCMPd, left, right);
+}
+
+static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc)
+{
+  if (irt_isnum(ir->t))
+    asm_fpmin_max(as, ir, fcc);
+  else
+    asm_intmin_max(as, ir, cc);
+}
+
+#define asm_max(as, ir)		asm_min_max(as, ir, CC_GT, CC_HI)
+#define asm_min(as, ir)		asm_min_max(as, ir, CC_LT, CC_LO)
+
+/* -- Comparisons --------------------------------------------------------- */
+
+/* Map of comparisons to flags. ORDER IR. */
+static const uint8_t asm_compmap[IR_ABC+1] = {
+  /* op  FP swp  int cc   FP cc */
+  /* LT       */ CC_GE + (CC_HS << 4),
+  /* GE    x  */ CC_LT + (CC_HI << 4),
+  /* LE       */ CC_GT + (CC_HI << 4),
+  /* GT    x  */ CC_LE + (CC_HS << 4),
+  /* ULT   x  */ CC_HS + (CC_LS << 4),
+  /* UGE      */ CC_LO + (CC_LO << 4),
+  /* ULE   x  */ CC_HI + (CC_LO << 4),
+  /* UGT      */ CC_LS + (CC_LS << 4),
+  /* EQ       */ CC_NE + (CC_NE << 4),
+  /* NE       */ CC_EQ + (CC_EQ << 4),
+  /* ABC      */ CC_LS + (CC_LS << 4)  /* Same as UGT. */
+};
+
+/* FP comparisons. */
+static void asm_fpcomp(ASMState *as, IRIns *ir)
+{
+  Reg left, right;
+  A64Ins ai;
+  int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
+  if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
+    left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31);
+    right = 0;
+    ai = A64I_FCMPZd;
+  } else {
+    left = ra_alloc2(as, ir, RSET_FPR);
+    if (swp) {
+      right = (left & 31); left = ((left >> 8) & 31);
+    } else {
+      right = ((left >> 8) & 31); left &= 31;
+    }
+    ai = A64I_FCMPd;
+  }
+  asm_guardcc(as, (asm_compmap[ir->o] >> 4));
+  emit_nm(as, ai, left, right);
+}
+
+/* Integer comparisons. */
+static void asm_intcomp(ASMState *as, IRIns *ir)
+{
+  A64CC oldcc, cc = (asm_compmap[ir->o] & 15);
+  A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw;
+  IRRef lref = ir->op1, rref = ir->op2;
+  Reg left;
+  uint32_t m;
+  int cmpprev0 = 0;
+  lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
+	     irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
+  if (asm_swapops(as, lref, rref)) {
+    IRRef tmp = lref; lref = rref; rref = tmp;
+    if (cc >= CC_GE) cc ^= 7;  /* LT <-> GT, LE <-> GE */
+    else if (cc > CC_NE) cc ^= 11;  /* LO <-> HI, LS <-> HS */
+  }
+  oldcc = cc;
+  if (irref_isk(rref) && get_k64val(IR(rref)) == 0) {
+    IRIns *irl = IR(lref);
+    if (cc == CC_GE) cc = CC_PL;
+    else if (cc == CC_LT) cc = CC_MI;
+    else if (cc > CC_NE) goto nocombine;  /* Other conds don't work with tst. */
+    cmpprev0 = (irl+1 == ir);
+    /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
+    if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
+      IRRef blref = irl->op1, brref = irl->op2;
+      uint32_t m2 = 0;
+      Reg bleft;
+      if (asm_swapops(as, blref, brref)) {
+	Reg tmp = blref; blref = brref; brref = tmp;
+      }
+      if (irref_isk(brref)) {
+	uint64_t k = get_k64val(IR(brref));
+	if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
+	  asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
+		       ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
+	  return;
+	}
+	m2 = emit_isk13(k, irt_is64(irl->t));
+      }
+      bleft = ra_alloc1(as, blref, RSET_GPR);
+      ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
+      if (!m2)
+	m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
+      asm_guardcc(as, cc);
+      emit_n(as, ai^m2, bleft);
+      return;
+    }
+    if (cc == CC_EQ || cc == CC_NE) {
+      /* Combine cmp-bcc into cbz/cbnz. */
+      ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ;
+      if (irt_is64(ir->t)) ai |= A64I_X;
+      asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR));
+      return;
+    }
+  }
+nocombine:
+  left = ra_alloc1(as, lref, RSET_GPR);
+  m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
+  asm_guardcc(as, cc);
+  emit_n(as, ai^m, left);
+  /* Signed comparison with zero and referencing previous ins? */
+  if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE))
+    as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
+}
+
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fpcomp(as, ir);
+  else
+    asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
+/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+
+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused on 64 bit. */
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  uint32_t k = emit_isk13(HOOK_PROFILE, 0);
+  lua_assert(k != 0);
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_n(as, A64I_TSTw^k, RID_TMP);
+  emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as, BCReg topslot,
+			    IRIns *irp, RegSet allow, ExitNo exitno)
+{
+  Reg pbase;
+  uint32_t k;
+  if (irp) {
+    if (!ra_hasspill(irp->s)) {
+      pbase = irp->r;
+      lua_assert(ra_hasreg(pbase));
+    } else if (allow) {
+      pbase = rset_pickbot(allow);
+    } else {
+      pbase = RID_RET;
+      emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0);  /* Restore temp register. */
+    }
+  } else {
+    pbase = RID_BASE;
+  }
+  emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
+  k = emit_isk12((8*topslot));
+  lua_assert(k);
+  emit_n(as, A64I_CMPx^k, RID_TMP);
+  emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
+  emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
+	   (int32_t)offsetof(lua_State, maxstack));
+  if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
+    if (ra_hasspill(irp->s))
+      emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
+    emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
+    if (ra_hasspill(irp->s) && !allow)
+      emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0);  /* Save temp register. */
+  } else {
+    emit_getgl(as, RID_TMP, cur_L);
+  }
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+#ifdef LUA_USE_ASSERT
+  SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
+#endif
+  MSize n, nent = snap->nent;
+  /* Store the value of all modified slots to the Lua stack. */
+  for (n = 0; n < nent; n++) {
+    SnapEntry sn = map[n];
+    BCReg s = snap_slot(sn);
+    int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
+    IRRef ref = snap_ref(sn);
+    IRIns *ir = IR(ref);
+    if ((sn & SNAP_NORESTORE))
+      continue;
+    if (irt_isnum(ir->t)) {
+      Reg src = ra_alloc1(as, ref, RSET_FPR);
+      emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
+    } else {
+      asm_tvstore64(as, RID_BASE, ofs, ref);
+    }
+    checkmclim(as);
+  }
+  lua_assert(map + nent == flinks);
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg tmp1, tmp2;
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+  asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ASMREF_TMP2;  /* MSize steps     */
+  asm_gencall(as, ci, args);
+  tmp1 = ra_releasetmp(as, ASMREF_TMP1);
+  tmp2 = ra_releasetmp(as, ASMREF_TMP2);
+  emit_loadi(as, tmp2, as->gcsteps);
+  /* Jump around GC step if GC total < GC threshold. */
+  emit_cond_branch(as, CC_LS, l_end);
+  emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
+  emit_lso(as, A64I_LDRx, tmp2, tmp1,
+	   (int32_t)offsetof(global_State, gc.threshold));
+  emit_lso(as, A64I_LDRx, RID_TMP, tmp1,
+	   (int32_t)offsetof(global_State, gc.total));
+  ra_allockreg(as, i64ptr(J2G(as->J)), tmp1);
+  as->gcsteps = 0;
+  checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+  MCode *p = as->mctop;
+  MCode *target = as->mcp;
+  if (as->loopinv) {  /* Inverted loop branch? */
+    uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu;
+    ptrdiff_t delta = target - (p - 2);
+    /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
+    p[-2] |= ((uint32_t)delta & mask) << 5;
+  } else {
+    ptrdiff_t delta = target - (p - 1);
+    p[-1] = A64I_B | A64F_S26(delta);
+  }
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Reload L register from g->cur_L. */
+static void asm_head_lreg(ASMState *as)
+{
+  IRIns *ir = IR(ASMREF_L);
+  if (ra_used(ir)) {
+    Reg r = ra_dest(as, ir, RSET_GPR);
+    emit_getgl(as, r, cur_L);
+    ra_evictk(as);
+  }
+}
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+  IRIns *ir;
+  asm_head_lreg(as);
+  ir = IR(REF_BASE);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
+  ra_destreg(as, ir, RID_BASE);
+}
+
+/* Coalesce BASE register for a side trace. */
+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+{
+  IRIns *ir;
+  asm_head_lreg(as);
+  ir = IR(REF_BASE);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
+  if (ra_hasspill(irp->s)) {
+    rset_clear(allow, ra_dest(as, ir, allow));
+  } else {
+    Reg r = irp->r;
+    lua_assert(ra_hasreg(r));
+    rset_clear(allow, r);
+    if (r != ir->r && !rset_test(as->freeset, r))
+      ra_restore(as, regcost_ref(as->cost[r]));
+    ra_destreg(as, ir, r);
+  }
+  return allow;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+  MCode *p = as->mctop;
+  MCode *target;
+  /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
+  int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
+  if (spadj == 0) {
+    *--p = A64I_LE(A64I_NOP);
+    as->mctop = p;
+  } else {
+    /* Patch stack adjustment. */
+    uint32_t k = emit_isk12(spadj);
+    lua_assert(k);
+    p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
+  }
+  /* Patch exit branch. */
+  target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
+  p[-1] = A64I_B | A64F_S26((target-p)+1);
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+  MCode *p = as->mctop - 1;  /* Leave room for exit branch. */
+  if (as->loopref) {
+    as->invmcp = as->mcp = p;
+  } else {
+    as->mcp = p-1;  /* Leave room for stack pointer adjustment. */
+    as->invmcp = NULL;
+  }
+  *p = 0;  /* Prevent load/store merging. */
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = CCI_XNARGS(ci);
+  int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+  asm_collectargs(as, ir, ci, args);
+  for (i = 0; i < nargs; i++) {
+    if (args[i] && irt_isfp(IR(args[i])->t)) {
+      if (nfpr > 0) nfpr--; else nslots += 2;
+    } else {
+      if (ngpr > 0) ngpr--; else nslots += 2;
+    }
+  }
+  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
+    as->evenspill = nslots;
+  return REGSP_HINT(RID_RET);
+}
+
+static void asm_setup_target(ASMState *as)
+{
+  /* May need extra exit for asm_stack_check on side traces. */
+  asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
+}
+
+#if LJ_BE
+/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
+static void asm_mcode_fixup(MCode *mcode, MSize size)
+{
+  MCode *pe = (MCode *)((char *)mcode + size);
+  while (mcode < pe) {
+    MCode ins = *mcode;
+    *mcode++ = lj_bswap(ins);
+  }
+}
+#define LJ_TARGET_MCODE_FIXUP	1
+#endif
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+  MCode *p = T->mcode;
+  MCode *pe = (MCode *)((char *)p + T->szmcode);
+  MCode *cstart = NULL;
+  MCode *mcarea = lj_mcode_patch(J, p, 0);
+  MCode *px = exitstub_trace_addr(T, exitno);
+  /* Note: this assumes a trace exit is only ever patched once. */
+  for (; p < pe; p++) {
+    /* Look for exitstub branch, replace with branch to target. */
+    ptrdiff_t delta = target - p;
+    MCode ins = A64I_LE(*p);
+    if ((ins & 0xff000000u) == 0x54000000u &&
+	((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
+      /* Patch bcc, if within range. */
+      if (A64F_S_OK(delta, 19)) {
+	*p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
+	if (!cstart) cstart = p;
+      }
+    } else if ((ins & 0xfc000000u) == 0x14000000u &&
+	       ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
+      /* Patch b. */
+      lua_assert(A64F_S_OK(delta, 26));
+      *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta));
+      if (!cstart) cstart = p;
+    } else if ((ins & 0x7e000000u) == 0x34000000u &&
+	       ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
+      /* Patch cbz/cbnz, if within range. */
+      if (A64F_S_OK(delta, 19)) {
+	*p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
+	if (!cstart) cstart = p;
+      }
+    } else if ((ins & 0x7e000000u) == 0x36000000u &&
+	       ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
+      /* Patch tbz/tbnz, if within range. */
+      if (A64F_S_OK(delta, 14)) {
+	*p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta));
+	if (!cstart) cstart = p;
+      }
+    }
+  }
+  {  /* Always patch long-range branch in exit stub itself. */
+    ptrdiff_t delta = target - px;
+    lua_assert(A64F_S_OK(delta, 26));
+    *px = A64I_B | A64F_S26(delta);
+    if (!cstart) cstart = px;
+  }
+  lj_mcode_sync(cstart, px+1);
+  lj_mcode_patch(J, mcarea, 1);
+}
+

File diff suppressed because it is too large
+ 492 - 92
love/src/jni/LuaJIT-2.1/src/lj_asm_mips.h


+ 298 - 62
love/src/jni/LuaJIT-2.1/src/lj_asm_ppc.h

@@ -1,6 +1,6 @@
 /*
 ** PPC IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Register allocator extensions --------------------------------------- */
@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
   emit_tab(as, pi, rt, left, right);
 }
 
+#if !LJ_SOFTFP
 /* Fuse to multiply-add/sub instruction. */
 static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
 {
@@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
   }
   return 0;
 }
+#endif
 
 /* -- Calls --------------------------------------------------------------- */
 
@@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
   uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 8;
-  Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
+  Reg gpr = REGARG_FIRSTGPR;
+#if !LJ_SOFTFP
+  Reg fpr = REGARG_FIRSTFPR;
+#endif
   if ((void *)ci->func)
     emit_call(as, (void *)ci->func);
   for (n = 0; n < nargs; n++) {  /* Setup args. */
     IRRef ref = args[n];
     if (ref) {
       IRIns *ir = IR(ref);
+#if !LJ_SOFTFP
       if (irt_isfp(ir->t)) {
 	if (fpr <= REGARG_LASTFPR) {
 	  lua_assert(rset_test(as->freeset, fpr));  /* Already evicted. */
@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 	  emit_spstore(as, ir, r, ofs);
 	  ofs += irt_isnum(ir->t) ? 8 : 4;
 	}
-      } else {
+      } else
+#endif
+      {
 	if (gpr <= REGARG_LASTGPR) {
 	  lua_assert(rset_test(as->freeset, gpr));  /* Already evicted. */
 	  ra_leftov(as, gpr, ref);
@@ -290,17 +298,21 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
     }
     checkmclim(as);
   }
+#if !LJ_SOFTFP
   if ((ci->flags & CCI_VARARG))  /* Vararg calls need to know about FPR use. */
     emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
+#endif
 }
 
 /* Setup result reg/sp for call. Evict scratch regs. */
 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   RegSet drop = RSET_SCRATCH;
-  int hiop = ((ir+1)->o == IR_HIOP);
+  int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+#if !LJ_SOFTFP
   if ((ci->flags & CCI_NOFPRCLOBBER))
     drop &= ~RSET_FPR;
+#endif
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   if (hiop && ra_hasreg((ir+1)->r))
@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   ra_evictset(as, drop);  /* Evictions must be performed first. */
   if (ra_used(ir)) {
     lua_assert(!irt_ispri(ir->t));
-    if (irt_isfp(ir->t)) {
+    if (!LJ_SOFTFP && irt_isfp(ir->t)) {
       if ((ci->flags & CCI_CASTU64)) {
 	/* Use spill slot or temp slots. */
 	int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
@@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 
 /* -- Type conversions ---------------------------------------------------- */
 
+#if !LJ_SOFTFP
 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
 {
   RegSet allow = RSET_FPR;
@@ -393,8 +406,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
   emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
   emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
   emit_lsptr(as, PPCI_LFS, (fbias & 31),
-	     (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
-	     RSET_GPR);
+	     (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
   emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
   emit_fb(as, PPCI_FCTIWZ, tmp, left);
 }
@@ -410,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir)
   emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
   emit_fab(as, PPCI_FADD, tmp, left, right);
 }
+#endif
 
 static void asm_conv(ASMState *as, IRIns *ir)
 {
   IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if !LJ_SOFTFP
   int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+#endif
   IRRef lref = ir->op1;
-  lua_assert(irt_type(ir->t) != st);
   lua_assert(!(irt_isint64(ir->t) ||
 	       (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
+#if LJ_SOFTFP
+  /* FP conversions are handled by SPLIT. */
+  lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
+  /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
+#else
+  lua_assert(irt_type(ir->t) != st);
   if (irt_isfp(ir->t)) {
     Reg dest = ra_dest(as, ir, RSET_FPR);
     if (stfp) {  /* FP to FP conversion. */
@@ -433,13 +453,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
       Reg left = ra_alloc1(as, lref, allow);
       Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
       Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
-      const float *kbias;
       if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
       emit_fab(as, PPCI_FSUB, dest, dest, fbias);
       emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
-      kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
-      if (st == IRT_U32) kbias++;
-      emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
+      emit_lsptr(as, PPCI_LFS, (fbias & 31),
+		 &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
 		 rset_clear(allow, hibias));
       emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
 	       RID_SP, SPOFS_TMPLO);
@@ -472,15 +490,16 @@ static void asm_conv(ASMState *as, IRIns *ir)
 	emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
 	emit_fab(as, PPCI_FSUB, tmp, left, tmp);
 	emit_lsptr(as, PPCI_LFS, (tmp & 31),
-		   (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
-		   RSET_GPR);
+		   (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
       } else {
 	emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
 	emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
 	emit_fb(as, PPCI_FCTIWZ, tmp, left);
       }
     }
-  } else {
+  } else
+#endif
+  {
     Reg dest = ra_dest(as, ir, RSET_GPR);
     if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
       Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -500,17 +519,41 @@ static void asm_strto(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   IRRef args[2];
-  int32_t ofs;
+  int32_t ofs = SPOFS_TMP;
+#if LJ_SOFTFP
+  ra_evictset(as, RSET_SCRATCH);
+  if (ra_used(ir)) {
+    if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
+	(ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
+      int i;
+      for (i = 0; i < 2; i++) {
+	Reg r = (ir+i)->r;
+	if (ra_hasreg(r)) {
+	  ra_free(as, r);
+	  ra_modified(as, r);
+	  emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
+	}
+      }
+      ofs = sps_scale(ir->s & ~1);
+    } else {
+      Reg rhi = ra_dest(as, ir+1, RSET_GPR);
+      Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
+      emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
+      emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
+    }
+  }
+#else
   RegSet drop = RSET_SCRATCH;
   if (ra_hasreg(ir->r)) rset_set(drop, ir->r);  /* Spill dest reg (if any). */
   ra_evictset(as, drop);
+  if (ir->s) ofs = sps_scale(ir->s);
+#endif
   asm_guardcc(as, CC_EQ);
   emit_ai(as, PPCI_CMPWI, RID_RET, 0);  /* Test return status. */
   args[0] = ir->op1;      /* GCstr *str */
   args[1] = ASMREF_TMP1;  /* TValue *n  */
   asm_gencall(as, ci, args);
   /* Store the result to the spill slot or temp slots. */
-  ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
   emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
 }
 
@@ -534,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
       Reg src = ra_alloc1(as, ref, allow);
       emit_setgl(as, src, tmptv.gcr);
     }
-    type = ra_allock(as, irt_toitype(ir->t), allow);
+    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
+      type = ra_alloc1(as, ref+1, allow);
+    else
+      type = ra_allock(as, irt_toitype(ir->t), allow);
     emit_setgl(as, type, tmptv.it);
   }
 }
@@ -578,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
   Reg tisnum = RID_NONE, tmpnum = RID_NONE;
   IRRef refkey = ir->op2;
   IRIns *irkey = IR(refkey);
+  int isk = irref_isk(refkey);
   IRType1 kt = irkey->t;
   uint32_t khash;
   MCLabel l_end, l_loop, l_next;
 
   rset_clear(allow, tab);
+#if LJ_SOFTFP
+  if (!isk) {
+    key = ra_alloc1(as, refkey, allow);
+    rset_clear(allow, key);
+    if (irkey[1].o == IR_HIOP) {
+      if (ra_hasreg((irkey+1)->r)) {
+	tmpnum = (irkey+1)->r;
+	ra_noweak(as, tmpnum);
+      } else {
+	tmpnum = ra_allocref(as, refkey+1, allow);
+      }
+      rset_clear(allow, tmpnum);
+    }
+  }
+#else
   if (irt_isnum(kt)) {
     key = ra_alloc1(as, refkey, RSET_FPR);
     tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
@@ -592,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
     key = ra_alloc1(as, refkey, allow);
     rset_clear(allow, key);
   }
+#endif
   tmp2 = ra_scratch(as, allow);
   rset_clear(allow, tmp2);
 
@@ -614,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
     asm_guardcc(as, CC_EQ);
   else
     emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
-  if (irt_isnum(kt)) {
+  if (!LJ_SOFTFP && irt_isnum(kt)) {
     emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
     emit_condbranch(as, PPCI_BC, CC_GE, l_next);
     emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
@@ -624,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
       emit_ab(as, PPCI_CMPW, tmp2, key);
       emit_condbranch(as, PPCI_BC, CC_NE, l_next);
     }
-    emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
+    if (LJ_SOFTFP && ra_hasreg(tmpnum))
+      emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
+    else
+      emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
     if (!irt_ispri(kt))
       emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
   }
@@ -633,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 	    (((char *)as->mcp-(char *)l_loop) & 0xffffu);
 
   /* Load main position relative to tab->node into dest. */
-  khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
+  khash = isk ? ir_khash(irkey) : 1;
   if (khash == 0) {
     emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
   } else {
     Reg tmphash = tmp1;
-    if (irref_isk(refkey))
+    if (isk)
       tmphash = ra_allock(as, khash, allow);
     emit_tab(as, PPCI_ADD, dest, dest, tmp1);
     emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
     emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
     emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
     emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
-    if (irref_isk(refkey)) {
+    if (isk) {
       /* Nothing to do. */
     } else if (irt_isstr(kt)) {
       emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
@@ -655,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
       emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
       emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
       emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
-      if (irt_isnum(kt)) {
+      if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
+#if LJ_SOFTFP
+	emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
+	emit_rotlwi(as, dest, tmp1, HASH_ROT1);
+	emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
+#else
 	int32_t ofs = ra_spill(as, irkey);
 	emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
 	emit_rotlwi(as, dest, tmp1, HASH_ROT1);
 	emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
 	emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
 	emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
+#endif
       } else {
 	emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
 	emit_rotlwi(as, dest, tmp1, HASH_ROT1);
@@ -717,7 +789,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 
 static void asm_uref(ASMState *as, IRIns *ir)
 {
-  /* NYI: Check that UREFO is still open and not aliasing a slot. */
   Reg dest = ra_dest(as, ir, RSET_GPR);
   if (irref_isk(ir->op1)) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
@@ -789,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir)
   case IRT_U8: return PPCI_LBZ;
   case IRT_I16: return PPCI_LHA;
   case IRT_U16: return PPCI_LHZ;
-  case IRT_NUM: return PPCI_LFD;
-  case IRT_FLOAT: return PPCI_LFS;
+  case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD;
+  case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
   default: return PPCI_LWZ;
   }
 }
@@ -800,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir)
   switch (irt_type(ir->t)) {
   case IRT_I8: case IRT_U8: return PPCI_STB;
   case IRT_I16: case IRT_U16: return PPCI_STH;
-  case IRT_NUM: return PPCI_STFD;
-  case IRT_FLOAT: return PPCI_STFS;
+  case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD;
+  case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
   default: return PPCI_STW;
   }
 }
@@ -809,17 +880,23 @@ static PPCIns asm_fxstoreins(IRIns *ir)
 static void asm_fload(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
   PPCIns pi = asm_fxloadins(ir);
+  Reg idx;
   int32_t ofs;
-  if (ir->op2 == IRFL_TAB_ARRAY) {
-    ofs = asm_fuseabase(as, ir->op1);
-    if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
-      emit_tai(as, PPCI_ADDI, dest, idx, ofs);
-      return;
+  if (ir->op1 == REF_NIL) {
+    idx = RID_JGL;
+    ofs = (ir->op2 << 2) - 32768;
+  } else {
+    idx = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (ir->op2 == IRFL_TAB_ARRAY) {
+      ofs = asm_fuseabase(as, ir->op1);
+      if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+	emit_tai(as, PPCI_ADDI, dest, idx, ofs);
+	return;
+      }
     }
+    ofs = field_ofs[ir->op2];
   }
-  ofs = field_ofs[ir->op2];
   lua_assert(!irt_isi8(ir->t));
   emit_tai(as, pi, dest, idx, ofs);
 }
@@ -838,7 +915,8 @@ static void asm_fstore(ASMState *as, IRIns *ir)
 
 static void asm_xload(ASMState *as, IRIns *ir)
 {
-  Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+  Reg dest = ra_dest(as, ir,
+    (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
   if (irt_isi8(ir->t))
     emit_as(as, PPCI_EXTSB, dest, dest);
@@ -856,7 +934,8 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
     Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
     asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
   } else {
-    Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+    Reg src = ra_alloc1(as, ir->op2,
+      (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
     asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
 		 rset_exclude(RSET_GPR, src), ofs);
   }
@@ -870,10 +949,19 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
   Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
   RegSet allow = RSET_GPR;
   int32_t ofs = AHUREF_LSX;
+  if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
+    t.irt = IRT_NUM;
+    if (ra_used(ir+1)) {
+      type = ra_dest(as, ir+1, allow);
+      rset_clear(allow, type);
+    }
+    ofs = 0;
+  }
   if (ra_used(ir)) {
-    lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
-    if (!irt_isnum(t)) ofs = 0;
-    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+    lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+	       irt_isint(ir->t) || irt_isaddr(ir->t));
+    if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
+    dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
     rset_clear(allow, dest);
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
@@ -882,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     asm_guardcc(as, CC_GE);
     emit_ab(as, PPCI_CMPLW, type, tisnum);
     if (ra_hasreg(dest)) {
-      if (ofs == AHUREF_LSX) {
+      if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
 	tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
 						       (idx&255)), (idx>>8)));
 	emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
       } else {
-	emit_fai(as, PPCI_LFD, dest, idx, ofs);
+	emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
+		 ofs+4*LJ_SOFTFP);
       }
     }
   } else {
@@ -910,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
   int32_t ofs = AHUREF_LSX;
   if (ir->r == RID_SINK)
     return;
-  if (irt_isnum(ir->t)) {
+  if (!LJ_SOFTFP && irt_isnum(ir->t)) {
     src = ra_alloc1(as, ir->op2, RSET_FPR);
   } else {
     if (!irt_ispri(ir->t)) {
@@ -918,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
       rset_clear(allow, src);
       ofs = 0;
     }
-    type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
+      type = ra_alloc1(as, (ir+1)->op2, allow);
+    else
+      type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
     rset_clear(allow, type);
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
-  if (irt_isnum(ir->t)) {
+  if (!LJ_SOFTFP && irt_isnum(ir->t)) {
     if (ofs == AHUREF_LSX) {
       emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
       emit_slwi(as, RID_TMP, (idx>>8), 3);
@@ -947,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir)
   IRType1 t = ir->t;
   Reg dest = RID_NONE, type = RID_NONE, base;
   RegSet allow = RSET_GPR;
+  int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
+  if (hiop)
+    t.irt = IRT_NUM;
   lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
-  lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+  lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
   lua_assert(LJ_DUALNUM ||
 	     !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
+#if LJ_SOFTFP
+  lua_assert(!(ir->op2 & IRSLOAD_CONVERT));  /* Handled by LJ_SOFTFP SPLIT. */
+  if (hiop && ra_used(ir+1)) {
+    type = ra_dest(as, ir+1, allow);
+    rset_clear(allow, type);
+  }
+#else
   if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
     dest = ra_scratch(as, RSET_FPR);
     asm_tointg(as, ir, dest);
     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
-  } else if (ra_used(ir)) {
+  } else
+#endif
+  if (ra_used(ir)) {
     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
-    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+    dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
     rset_clear(allow, dest);
     base = ra_alloc1(as, REF_BASE, allow);
     rset_clear(allow, base);
-    if ((ir->op2 & IRSLOAD_CONVERT)) {
+    if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
       if (irt_isint(t)) {
 	emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
 	dest = ra_scratch(as, RSET_FPR);
@@ -975,7 +1079,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
 	emit_fab(as, PPCI_FSUB, dest, dest, fbias);
 	emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
 	emit_lsptr(as, PPCI_LFS, (fbias & 31),
-		   (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
+		   (void *)&as->J->k32[LJ_K32_2P52_2P31],
 		   rset_clear(allow, hibias));
 	emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
 	emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
@@ -993,10 +1097,13 @@ dotypecheck:
     if ((ir->op2 & IRSLOAD_TYPECHECK)) {
       Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
       asm_guardcc(as, CC_GE);
-      emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
+#if !LJ_SOFTFP
       type = RID_TMP;
+#endif
+      emit_ab(as, PPCI_CMPLW, type, tisnum);
     }
-    if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
+    if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
+				  base, ofs-(LJ_SOFTFP?0:4));
   } else {
     if ((ir->op2 & IRSLOAD_TYPECHECK)) {
       asm_guardcc(as, CC_NE);
@@ -1118,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
 
 /* -- Arithmetic and logic operations ------------------------------------- */
 
+#if !LJ_SOFTFP
 static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
 {
   Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1145,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
   else
     asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
+#endif
 
 static void asm_add(ASMState *as, IRIns *ir)
 {
+#if !LJ_SOFTFP
   if (irt_isnum(ir->t)) {
     if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
       asm_fparith(as, ir, PPCI_FADD);
-  } else {
+  } else
+#endif
+  {
     Reg dest = ra_dest(as, ir, RSET_GPR);
     Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
     PPCIns pi;
@@ -1190,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir)
 
 static void asm_sub(ASMState *as, IRIns *ir)
 {
+#if !LJ_SOFTFP
   if (irt_isnum(ir->t)) {
     if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
       asm_fparith(as, ir, PPCI_FSUB);
-  } else {
+  } else
+#endif
+  {
     PPCIns pi = PPCI_SUBF;
     Reg dest = ra_dest(as, ir, RSET_GPR);
     Reg left, right;
@@ -1219,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
 
 static void asm_mul(ASMState *as, IRIns *ir)
 {
+#if !LJ_SOFTFP
   if (irt_isnum(ir->t)) {
     asm_fparith(as, ir, PPCI_FMUL);
-  } else {
+  } else
+#endif
+  {
     PPCIns pi = PPCI_MULLW;
     Reg dest = ra_dest(as, ir, RSET_GPR);
     Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
@@ -1249,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns *ir)
 
 static void asm_neg(ASMState *as, IRIns *ir)
 {
+#if !LJ_SOFTFP
   if (irt_isnum(ir->t)) {
     asm_fpunary(as, ir, PPCI_FNEG);
-  } else {
+  } else
+#endif
+  {
     Reg dest, left;
     PPCIns pi = PPCI_NEG;
     if (as->flagmcp == as->mcp) {
@@ -1562,9 +1683,40 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
 		       PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
 #define asm_bror(as, ir)	lua_assert(0)
 
+#if LJ_SOFTFP
+static void asm_sfpmin_max(ASMState *as, IRIns *ir)
+{
+  CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
+  IRRef args[4];
+  MCLabel l_right, l_end;
+  Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
+  Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
+  Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
+  PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
+  righthi = (lefthi >> 8); lefthi &= 255;
+  rightlo = (leftlo >> 8); leftlo &= 255;
+  args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
+  args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
+  l_end = emit_label(as);
+  if (desthi != righthi) emit_mr(as, desthi, righthi);
+  if (destlo != rightlo) emit_mr(as, destlo, rightlo);
+  l_right = emit_label(as);
+  if (l_end != l_right) emit_jmp(as, l_end);
+  if (desthi != lefthi) emit_mr(as, desthi, lefthi);
+  if (destlo != leftlo) emit_mr(as, destlo, leftlo);
+  if (l_right == as->mcp+1) {
+    cond ^= 4; l_right = l_end; ++as->mcp;
+  }
+  emit_condbranch(as, PPCI_BC, cond, l_right);
+  ra_evictset(as, RSET_SCRATCH);
+  emit_cmpi(as, RID_RET, 1);
+  asm_gencall(as, &ci, args);
+}
+#endif
+
 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
 {
-  if (irt_isnum(ir->t)) {
+  if (!LJ_SOFTFP && irt_isnum(ir->t)) {
     Reg dest = ra_dest(as, ir, RSET_FPR);
     Reg tmp = dest;
     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
@@ -1652,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
 static void asm_comp(ASMState *as, IRIns *ir)
 {
   PPCCC cc = asm_compmap[ir->o];
-  if (irt_isnum(ir->t)) {
+  if (!LJ_SOFTFP && irt_isnum(ir->t)) {
     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
     right = (left >> 8); left &= 255;
     asm_guardcc(as, (cc >> 4));
@@ -1673,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns *ir)
 
 #define asm_equal(as, ir)	asm_comp(as, ir)
 
+#if LJ_SOFTFP
+/* SFP comparisons. */
+static void asm_sfpcomp(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
+  RegSet drop = RSET_SCRATCH;
+  Reg r;
+  IRRef args[4];
+  args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
+  args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
+
+  for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
+    if (!rset_test(as->freeset, r) &&
+	regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
+      rset_clear(drop, r);
+  }
+  ra_evictset(as, drop);
+  asm_setupresult(as, ir, ci);
+  switch ((IROp)ir->o) {
+  case IR_ULT:
+    asm_guardcc(as, CC_EQ);
+    emit_ai(as, PPCI_CMPWI, RID_RET, 0);
+  case IR_ULE:
+    asm_guardcc(as, CC_EQ);
+    emit_ai(as, PPCI_CMPWI, RID_RET, 1);
+    break;
+  case IR_GE: case IR_GT:
+    asm_guardcc(as, CC_EQ);
+    emit_ai(as, PPCI_CMPWI, RID_RET, 2);
+  default:
+    asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
+    emit_ai(as, PPCI_CMPWI, RID_RET, 0);
+    break;
+  }
+  asm_gencall(as, ci, args);
+}
+#endif
+
 #if LJ_HASFFI
 /* 64 bit integer comparisons. */
 static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1702,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRIns *ir)
 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_HASFFI
+#if LJ_HASFFI || LJ_SOFTFP
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
     as->curins--;  /* Always skip the CONV. */
+#if LJ_HASFFI && !LJ_SOFTFP
     if (usehi || uselo)
       asm_conv64(as, ir);
     return;
+#endif
   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
     as->curins--;  /* Always skip the loword comparison. */
+#if LJ_SOFTFP
+    if (!irt_isint(ir->t)) {
+      asm_sfpcomp(as, ir-1);
+      return;
+    }
+#endif
+#if LJ_HASFFI
     asm_comp64(as, ir);
+#endif
+    return;
+#if LJ_SOFTFP
+  } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
+      as->curins--;  /* Always skip the loword min/max. */
+    if (uselo || usehi)
+      asm_sfpmin_max(as, ir-1);
     return;
+#endif
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
@@ -1725,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
+#if LJ_HASFFI
   case IR_ADD: as->curins--; asm_add64(as, ir); break;
   case IR_SUB: as->curins--; asm_sub64(as, ir); break;
   case IR_NEG: as->curins--; asm_neg64(as, ir); break;
+#endif
+#if LJ_SOFTFP
+  case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+  case IR_STRTO:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
+    break;
+#endif
   case IR_CALLN:
+  case IR_CALLS:
   case IR_CALLXS:
     if (!uselo)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
+#if LJ_SOFTFP
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
+#endif
   case IR_CNEWI:
     /* Nothing to do here. Handled by lo op itself. */
     break;
@@ -1796,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
     if ((sn & SNAP_NORESTORE))
       continue;
     if (irt_isnum(ir->t)) {
+#if LJ_SOFTFP
+      Reg tmp;
+      RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+      lua_assert(irref_isk(ref));  /* LJ_SOFTFP: must be a number constant. */
+      tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
+      emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
+      if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
+      tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
+      emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
+#else
       Reg src = ra_alloc1(as, ref, RSET_FPR);
       emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
+#endif
     } else {
       Reg type;
       RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
@@ -1810,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
       if ((sn & (SNAP_CONT|SNAP_FRAME))) {
 	if (s == 0) continue;  /* Do not overwrite link to previous frame. */
 	type = ra_allock(as, (int32_t)(*flinks--), allow);
+#if LJ_SOFTFP
+      } else if ((sn & SNAP_SOFTFPNUM)) {
+	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
+#endif
       } else {
 	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
       }
@@ -1946,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
   int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++)
-    if (args[i] && irt_isfp(IR(args[i])->t)) {
+    if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
       if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
     } else {
       if (ngpr > 0) ngpr--; else nslots++;
     }
   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
     as->evenspill = nslots;
-  return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
+  return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
+					   REGSP_HINT(RID_RET);
 }
 
 static void asm_setup_target(ASMState *as)

File diff suppressed because it is too large
+ 428 - 78
love/src/jni/LuaJIT-2.1/src/lj_asm_x86.h


+ 1 - 1
love/src/jni/LuaJIT-2.1/src/lj_bc.c

@@ -1,6 +1,6 @@
 /*
 ** Bytecode instruction modes.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_bc_c

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/lj_bc.h

@@ -1,6 +1,6 @@
 /*
 ** Bytecode instruction format.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_BC_H

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/lj_bcdump.h

@@ -1,6 +1,6 @@
 /*
 ** Bytecode dump definitions.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_BCDUMP_H

+ 1 - 1
love/src/jni/LuaJIT-2.1/src/lj_bcread.c

@@ -1,6 +1,6 @@
 /*
 ** Bytecode reader.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_bcread_c

Some files were not shown because too many files changed in this diff