Преглед на файлове

LuaJIT and MaxLuaJIT. Initial Import.
Linux and OS X libs.

woollybah преди 10 години
родител
ревизия
0c55812d35
променени са 100 файла, в които са добавени 53150 реда и са изтрити 0 реда
  1. 20 0
      .gitignore
  2. 22 0
      luajit.mod/examples/ffi.lua
  3. 37 0
      luajit.mod/examples/ffi_library.bmx
  4. 30 0
      luajit.mod/examples/image.lua
  5. 27 0
      luajit.mod/examples/image_plain_lua.lua
  6. 14 0
      luajit.mod/examples/mandelbrot.bmx
  7. 74 0
      luajit.mod/examples/mandelbrot.lua
  8. 5 0
      luajit.mod/examples/messagebox.lua
  9. 1 0
      luajit.mod/examples/metamethods.lua
  10. 422 0
      luajit.mod/examples/scimark.lua
  11. 17 0
      luajit.mod/examples/sieve.bmx
  12. 38 0
      luajit.mod/examples/sieve.lua
  13. 51 0
      luajit.mod/glue.c
  14. BIN
      luajit.mod/lib/linux/libluajit_x64.a
  15. BIN
      luajit.mod/lib/linux/libluajit_x86.a
  16. BIN
      luajit.mod/lib/macos/libluajit_x64.a
  17. BIN
      luajit.mod/lib/macos/libluajit_x86.a
  18. 1085 0
      luajit.mod/luajit.bmx
  19. 56 0
      luajit.mod/luajit/COPYRIGHT
  20. 151 0
      luajit.mod/luajit/Makefile
  21. 16 0
      luajit.mod/luajit/README
  22. 166 0
      luajit.mod/luajit/doc/bluequad-print.css
  23. 325 0
      luajit.mod/luajit/doc/bluequad.css
  24. 978 0
      luajit.mod/luajit/doc/changes.html
  25. 102 0
      luajit.mod/luajit/doc/contact.html
  26. 187 0
      luajit.mod/luajit/doc/ext_c_api.html
  27. 330 0
      luajit.mod/luajit/doc/ext_ffi.html
  28. 566 0
      luajit.mod/luajit/doc/ext_ffi_api.html
  29. 1245 0
      luajit.mod/luajit/doc/ext_ffi_semantics.html
  30. 601 0
      luajit.mod/luajit/doc/ext_ffi_tutorial.html
  31. 199 0
      luajit.mod/luajit/doc/ext_jit.html
  32. 408 0
      luajit.mod/luajit/doc/extensions.html
  33. 184 0
      luajit.mod/luajit/doc/faq.html
  34. BIN
      luajit.mod/luajit/doc/img/contact.png
  35. 646 0
      luajit.mod/luajit/doc/install.html
  36. 234 0
      luajit.mod/luajit/doc/luajit.html
  37. 306 0
      luajit.mod/luajit/doc/running.html
  38. 116 0
      luajit.mod/luajit/doc/status.html
  39. 456 0
      luajit.mod/luajit/dynasm/dasm_arm.h
  40. 1125 0
      luajit.mod/luajit/dynasm/dasm_arm.lua
  41. 416 0
      luajit.mod/luajit/dynasm/dasm_mips.h
  42. 953 0
      luajit.mod/luajit/dynasm/dasm_mips.lua
  43. 412 0
      luajit.mod/luajit/dynasm/dasm_ppc.h
  44. 1249 0
      luajit.mod/luajit/dynasm/dasm_ppc.lua
  45. 83 0
      luajit.mod/luajit/dynasm/dasm_proto.h
  46. 12 0
      luajit.mod/luajit/dynasm/dasm_x64.lua
  47. 471 0
      luajit.mod/luajit/dynasm/dasm_x86.h
  48. 1945 0
      luajit.mod/luajit/dynasm/dasm_x86.lua
  49. 1094 0
      luajit.mod/luajit/dynasm/dynasm.lua
  50. 88 0
      luajit.mod/luajit/etc/luajit.1
  51. 25 0
      luajit.mod/luajit/etc/luajit.pc
  52. 684 0
      luajit.mod/luajit/src/Makefile
  53. 226 0
      luajit.mod/luajit/src/Makefile.dep
  54. 4 0
      luajit.mod/luajit/src/host/README
  55. 516 0
      luajit.mod/luajit/src/host/buildvm.c
  56. 104 0
      luajit.mod/luajit/src/host/buildvm.h
  57. 313 0
      luajit.mod/luajit/src/host/buildvm_asm.c
  58. 229 0
      luajit.mod/luajit/src/host/buildvm_fold.c
  59. 398 0
      luajit.mod/luajit/src/host/buildvm_lib.c
  60. 368 0
      luajit.mod/luajit/src/host/buildvm_peobj.c
  61. 428 0
      luajit.mod/luajit/src/host/genminilua.lua
  62. 7770 0
      luajit.mod/luajit/src/host/minilua.c
  63. 191 0
      luajit.mod/luajit/src/jit/bc.lua
  64. 659 0
      luajit.mod/luajit/src/jit/bcsave.lua
  65. 689 0
      luajit.mod/luajit/src/jit/dis_arm.lua
  66. 428 0
      luajit.mod/luajit/src/jit/dis_mips.lua
  67. 20 0
      luajit.mod/luajit/src/jit/dis_mipsel.lua
  68. 591 0
      luajit.mod/luajit/src/jit/dis_ppc.lua
  69. 20 0
      luajit.mod/luajit/src/jit/dis_x64.lua
  70. 836 0
      luajit.mod/luajit/src/jit/dis_x86.lua
  71. 699 0
      luajit.mod/luajit/src/jit/dump.lua
  72. 167 0
      luajit.mod/luajit/src/jit/v.lua
  73. 167 0
      luajit.mod/luajit/src/lauxlib.h
  74. 356 0
      luajit.mod/luajit/src/lib_aux.c
  75. 683 0
      luajit.mod/luajit/src/lib_base.c
  76. 74 0
      luajit.mod/luajit/src/lib_bit.c
  77. 405 0
      luajit.mod/luajit/src/lib_debug.c
  78. 851 0
      luajit.mod/luajit/src/lib_ffi.c
  79. 55 0
      luajit.mod/luajit/src/lib_init.c
  80. 539 0
      luajit.mod/luajit/src/lib_io.c
  81. 663 0
      luajit.mod/luajit/src/lib_jit.c
  82. 233 0
      luajit.mod/luajit/src/lib_math.c
  83. 287 0
      luajit.mod/luajit/src/lib_os.c
  84. 602 0
      luajit.mod/luajit/src/lib_package.c
  85. 940 0
      luajit.mod/luajit/src/lib_string.c
  86. 300 0
      luajit.mod/luajit/src/lib_table.c
  87. 26 0
      luajit.mod/luajit/src/lj.supp
  88. 1396 0
      luajit.mod/luajit/src/lj_alloc.c
  89. 17 0
      luajit.mod/luajit/src/lj_alloc.h
  90. 1200 0
      luajit.mod/luajit/src/lj_api.c
  91. 437 0
      luajit.mod/luajit/src/lj_arch.h
  92. 1920 0
      luajit.mod/luajit/src/lj_asm.c
  93. 17 0
      luajit.mod/luajit/src/lj_asm.h
  94. 2361 0
      luajit.mod/luajit/src/lj_asm_arm.h
  95. 1977 0
      luajit.mod/luajit/src/lj_asm_mips.h
  96. 2169 0
      luajit.mod/luajit/src/lj_asm_ppc.h
  97. 2806 0
      luajit.mod/luajit/src/lj_asm_x86.h
  98. 14 0
      luajit.mod/luajit/src/lj_bc.c
  99. 261 0
      luajit.mod/luajit/src/lj_bc.h
  100. 66 0
      luajit.mod/luajit/src/lj_bcdump.h

+ 20 - 0
.gitignore

@@ -0,0 +1,20 @@
+*.x86.a
+*.x86.i
+*.bak
+*.x64.a
+*.x64.i
+*.arm.a
+*.arm.i
+*.armeabi.a
+*.armeabi.i
+*.armeabiv7a.a
+*.armeabiv7a.i
+*.arm64v8a.a
+*.arm64v8a.i
+*.js.a
+*.js.i
+*.armv7.a
+*.armv7.i
+*.arm64.a
+*.arm64.i
+.bmx/

+ 22 - 0
luajit.mod/examples/ffi.lua

@@ -0,0 +1,22 @@
+local ffi = require("ffi")
+ffi.cdef[[
+void Sleep(int ms);
+int poll(struct pollfd *fds, unsigned long nfds, int timeout);
+]]
+
+local sleep
+if ffi.os == "Windows" then
+  function sleep(s)
+    ffi.C.Sleep(s*1000)
+  end
+else
+  function sleep(s)
+    ffi.C.poll(nil, 0, s*1000)
+  end
+end
+
+for i=1,160 do
+  io.write("."); io.flush()
+  sleep(0.01)
+end
+io.write("\n")

+ 37 - 0
luajit.mod/examples/ffi_library.bmx

@@ -0,0 +1,37 @@
+SuperStrict
+
+Framework zeke.luajit
+
+Import brl.standardio
+
+Global l:Byte Ptr = luaL_newstate()
+
+luaL_openlibs(l)
+
+Function LUA(file:String)
+	If luaL_dofile(l , file) Then
+		Print lua_tostring(l , - 1)
+		End
+	EndIf
+End Function
+
+Print "FFI:"
+LUA("ffi.lua")
+
+Print "~n~nMetaMethods:"
+LUA("metamethods.lua")
+
+Print "~n~nImage pixels:"
+LUA("image.lua")
+
+Print "~n~nImage pixels plain LUA:"
+LUA("image_plain_lua.lua")
+
+Print "~nSCIMARK:"
+LUA("scimark.lua")
+
+Print "~n~nMessagebox:"
+LUA("messagebox.lua")
+
+
+'WaitKey

+ 30 - 0
luajit.mod/examples/image.lua

@@ -0,0 +1,30 @@
+local ffi = require("ffi")
+ffi.cdef[[
+typedef struct { uint8_t red, green, blue, alpha; } rgba_pixel;
+]]
+
+local function image_ramp_green(n)
+  local img = ffi.new("rgba_pixel[?]", n)
+  local f = 255/(n-1)
+  for i=0,n-1 do
+    img[i].green = i*f
+    img[i].alpha = 255
+  end
+  return img
+end
+
+local function image_to_grey(img, n)
+  for i=0,n-1 do
+    local y = 0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue
+    img[i].red = y; img[i].green = y; img[i].blue = y
+  end
+end
+
+local N = 1024*1024
+local img = image_ramp_green(N)
+local t=os.time()
+for i=1,1000 do
+  image_to_grey(img, N)
+end
+t=os.time()-t
+print ("Time="..t)

+ 27 - 0
luajit.mod/examples/image_plain_lua.lua

@@ -0,0 +1,27 @@
+local floor = math.floor
+
+local function image_ramp_green(n)
+  local img = {}
+  local f = 255/(n-1)
+  for i=1,n do
+    img[i] = { red = 0, green = floor((i-1)*f), blue = 0, alpha = 255 }
+  end
+  return img
+end
+
+local function image_to_grey(img, n)
+  for i=1,n do
+    local y = floor(0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue)
+    img[i].red = y; img[i].green = y; img[i].blue = y
+  end
+end
+
+local N = 400*400
+local img = image_ramp_green(N)
+local t=os.time()
+for i=1,1000 do
+  image_to_grey(img, N)
+end
+
+t=os.time()-t
+print ("Time="..t)

+ 14 - 0
luajit.mod/examples/mandelbrot.bmx

@@ -0,0 +1,14 @@
+SuperStrict
+
+Framework zeke.luajit
+
+Import brl.standardio
+
+Global l:Byte Ptr = luaL_newstate()
+
+luaL_openlibs(l)
+
+
+If luaL_dofile(l , "mandelbrot.lua") Then 
+	Print lua_tostring(l , - 1)
+EndIf

+ 74 - 0
luajit.mod/examples/mandelbrot.lua

@@ -0,0 +1,74 @@
+--jit.off()
+
+local BAILOUT = 16
+local MAX_ITERATIONS = 100000
+
+function iterate(x,y)
+
+	local cr = y-0.8
+	local ci = x
+	local zi = 0.0
+	local zr = 0.0
+	local i = 0
+	
+	while 1 do
+		i = i+1
+		local temp = zr * zi
+		local zr2 = zr*zr
+		local zi2 = zi*zi
+		zr = zr2-zi2+cr
+		zi = temp+temp+ci
+		if (zi2+zr2 > BAILOUT) then
+			return i
+		end
+		
+		if (i > MAX_ITERATIONS) then
+			return 0
+		end
+	end
+
+end
+
+function mandelbrot()
+	local t = os.time()
+	local s=""
+	for y = -39, 38 do
+		for x = -39, 38 do
+			if (iterate(x/40.0, y/40) == 0) then
+				--io.write("*")
+				s=s.."*"
+			else
+				--io.write(" ")
+				s=s.." "
+			end
+		end
+		--io.write(s.."\n")
+		s=s.."\n"
+	end
+	t=os.time()-t
+	print(s)
+	io.write(string.format("Time Elapsed %d\n", t))
+end
+
+jit.opt.start(2)
+
+jit.on()
+if jit.status()==true then
+	print "JIT ON"
+else
+	print "JIT OFF"
+end
+mandelbrot()
+jit.off()
+if jit.status()==true then
+	print "JIT ON"
+else
+	print "JIT OFF"
+end
+mandelbrot()
+print("LuaJIT version: "..jit.version)
+
+
+
+
+

+ 5 - 0
luajit.mod/examples/messagebox.lua

@@ -0,0 +1,5 @@
+local ffi = require("ffi")
+ffi.cdef[[
+int MessageBoxA(void *w, const char *txt, const char *cap, int type);
+]]
+ffi.C.MessageBoxA(nil, "Hello world!", "Test", 0)

+ 1 - 0
luajit.mod/examples/metamethods.lua

@@ -0,0 +1 @@
+local ffi = require("ffi")
ffi.cdef[[
typedef struct { double x, y; } point_t;
]]

local point
local mt = {
  __add = function(a, b) return point(a.x+b.x, a.y+b.y) end,
  __len = function(a) return math.sqrt(a.x*a.x + a.y*a.y) end,
  __index = {
    area = function(a) return a.x*a.x + a.y*a.y end,
  },
}
point = ffi.metatype("point_t", mt)

local a = point(3, 4)
print(a.x, a.y)  --> 3  4
print(#a)        --> 5
print(a:area())  --> 25
local b = a + point(0.5, 8)
print(#b)        --> 12.5

+ 422 - 0
luajit.mod/examples/scimark.lua

@@ -0,0 +1,422 @@
+------------------------------------------------------------------------------
+-- Lua SciMark (2010-12-20).
+--
+-- A literal translation of SciMark 2.0a, written in Java and C.
+-- Credits go to the original authors Roldan Pozo and Bruce Miller.
+-- See: http://math.nist.gov/scimark2/
+------------------------------------------------------------------------------
+-- Copyright (C) 2006-2010 Mike Pall. All rights reserved.
+--
+-- Permission is hereby granted, free of charge, to any person obtaining
+-- a copy of this software and associated documentation files (the
+-- "Software"), to deal in the Software without restriction, including
+-- without limitation the rights to use, copy, modify, merge, publish,
+-- distribute, sublicense, and/or sell copies of the Software, and to
+-- permit persons to whom the Software is furnished to do so, subject to
+-- the following conditions:
+--
+-- The above copyright notice and this permission notice shall be
+-- included in all copies or substantial portions of the Software.
+--
+-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+-- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+-- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+-- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+--
+-- [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+------------------------------------------------------------------------------
+
+local SCIMARK_VERSION = "2010-12-10"
+local SCIMARK_COPYRIGHT = "Copyright (C) 2006-2010 Mike Pall"
+
+local MIN_TIME = 2.0
+local RANDOM_SEED = 101009 -- Must be odd.
+local SIZE_SELECT = "small"
+
+local benchmarks = {
+  "FFT", "SOR", "MC", "SPARSE", "LU",
+  small = {
+    FFT		= { 1024 },
+    SOR		= { 100 },
+    MC		= { },
+    SPARSE	= { 1000, 5000 },
+    LU		= { 100 },
+  },
+  large = {
+    FFT		= { 1048576 },
+    SOR		= { 1000 },
+    MC		= { },
+    SPARSE	= { 100000, 1000000 },
+    LU		= { 1000 },
+  },
+}
+
+local abs, log, sin, floor = math.abs, math.log, math.sin, math.floor
+local pi, clock = math.pi, os.clock
+local format = string.format
+
+------------------------------------------------------------------------------
+-- Select array type: Lua tables or native (FFI) arrays
+------------------------------------------------------------------------------
+
+local darray, iarray
+
+local function array_init()
+  if jit and jit.status and jit.status() then
+    local ok, ffi = pcall(require, "ffi")
+    if ok then
+      darray = ffi.typeof("double[?]")
+      iarray = ffi.typeof("int[?]")
+      return
+    end
+  end
+  function darray(n) return {} end
+  iarray = darray
+end
+
+------------------------------------------------------------------------------
+-- This is a Lagged Fibonacci Pseudo-random Number Generator with
+-- j, k, M = 5, 17, 31. Pretty weak, but same as C/Java SciMark.
+------------------------------------------------------------------------------
+
+local rand, rand_init
+
+if jit and jit.status and jit.status() then
+  -- LJ2 has bit operations and zero-based arrays (internally).
+  local bit = require("bit")
+  local band, sar = bit.band, bit.arshift
+  function rand_init(seed)
+    local Rm, Rj, Ri = iarray(17), 16, 11
+    for i=0,16 do Rm[i] = 0 end
+    for i=16,0,-1 do
+      seed = band(seed*9069, 0x7fffffff)
+      Rm[i] = seed
+    end
+    function rand()
+      local i = band(Ri+1, sar(Ri-16, 31))
+      local j = band(Rj+1, sar(Rj-16, 31))
+      Ri, Rj = i, j
+      local k = band(Rm[i] - Rm[j], 0x7fffffff)
+      Rm[j] = k
+      return k * (1.0/2147483647.0)
+    end
+  end
+else
+  -- Better for standard Lua with one-based arrays and without bit operations.
+  function rand_init(seed)
+    local Rm, Rj = {}, 1
+    for i=1,17 do Rm[i] = 0 end
+    for i=17,1,-1 do
+      seed = (seed*9069) % (2^31)
+      Rm[i] = seed
+    end
+    function rand()
+      local j, m = Rj, Rm
+      local h = j - 5
+      if h < 1 then h = h + 17 end
+      local k = m[h] - m[j]
+      if k < 0 then k = k + 2147483647 end
+      m[j] = k
+      if j < 17 then Rj = j + 1 else Rj = 1 end
+      return k * (1.0/2147483647.0)
+    end
+  end
+end
+
+local function random_vector(n)
+  local v = darray(n+1)
+  for x=1,n do v[x] = rand() end
+  return v
+end
+
+local function random_matrix(m, n)
+  local a = {}
+  for y=1,m do
+    local v = darray(n+1)
+    a[y] = v
+    for x=1,n do v[x] = rand() end
+  end
+  return a
+end
+
+------------------------------------------------------------------------------
+-- FFT: Fast Fourier Transform.
+------------------------------------------------------------------------------
+
+local function fft_bitreverse(v, n)
+  local j = 0
+  for i=0,2*n-4,2 do
+    if i < j then
+      v[i+1], v[i+2], v[j+1], v[j+2] = v[j+1], v[j+2], v[i+1], v[i+2]
+    end
+    local k = n
+    while k <= j do j = j - k; k = k / 2 end
+    j = j + k
+  end
+end
+
+local function fft_transform(v, n, dir)
+  if n <= 1 then return end
+  fft_bitreverse(v, n)
+  local dual = 1
+  repeat
+    local dual2 = 2*dual
+    for i=1,2*n-1,2*dual2 do
+      local j = i+dual2
+      local ir, ii = v[i], v[i+1]
+      local jr, ji = v[j], v[j+1]
+      v[j], v[j+1] = ir - jr, ii - ji
+      v[i], v[i+1] = ir + jr, ii + ji
+    end
+    local theta = dir * pi / dual
+    local s, s2 = sin(theta), 2.0 * sin(theta * 0.5)^2
+    local wr, wi = 1.0, 0.0
+    for a=3,dual2-1,2 do
+      wr, wi = wr - s*wi - s2*wr, wi + s*wr - s2*wi
+      for i=a,a+2*(n-dual2),2*dual2 do
+	local j = i+dual2
+	local jr, ji = v[j], v[j+1]
+	local dr, di = wr*jr - wi*ji, wr*ji + wi*jr
+	local ir, ii = v[i], v[i+1]
+	v[j], v[j+1] = ir - dr, ii - di
+	v[i], v[i+1] = ir + dr, ii + di
+      end
+    end
+    dual = dual2
+  until dual >= n
+end
+
+function benchmarks.FFT(n)
+  local l2n = log(n)/log(2)
+  if l2n % 1 ~= 0 then
+    io.stderr:write("Error: FFT data length is not a power of 2\n")
+    os.exit(1)
+  end
+  local v = random_vector(n*2)
+  return function(cycles)
+    local norm = 1.0 / n
+    for p=1,cycles do
+      fft_transform(v, n, -1)
+      fft_transform(v, n, 1)
+      for i=1,n*2 do v[i] = v[i] * norm end
+    end
+    return ((5*n-2)*l2n + 2*(n+1)) * cycles
+  end
+end
+
+------------------------------------------------------------------------------
+-- SOR: Jacobi Successive Over-Relaxation.
+------------------------------------------------------------------------------
+
+local function sor_run(mat, m, n, cycles, omega)
+  local om4, om1 = omega*0.25, 1.0-omega
+  m = m - 1
+  n = n - 1
+  for i=1,cycles do
+    for y=2,m do
+      local v, vp, vn = mat[y], mat[y-1], mat[y+1]
+      for x=2,n do
+	v[x] = om4*((vp[x]+vn[x])+(v[x-1]+v[x+1])) + om1*v[x]
+      end
+    end
+  end
+end
+
+function benchmarks.SOR(n)
+  local mat = random_matrix(n, n)
+  return function(cycles)
+    sor_run(mat, n, n, cycles, 1.25)
+    return (n-1)*(n-1)*cycles*6
+  end
+end
+
+------------------------------------------------------------------------------
+-- MC: Monte Carlo Integration.
+------------------------------------------------------------------------------
+
+local function mc_integrate(cycles)
+  local under_curve = 0
+  local rand = rand
+  for i=1,cycles do
+    local x = rand()
+    local y = rand()
+    if x*x + y*y <= 1.0 then under_curve = under_curve + 1 end
+  end
+  return (under_curve/cycles) * 4
+end
+
+function benchmarks.MC()
+  return function(cycles)
+    local res = mc_integrate(cycles)
+    assert(math.sqrt(cycles)*math.abs(res-math.pi) < 5.0, "bad MC result")
+    return cycles * 4 -- Way off, but same as SciMark in C/Java.
+  end
+end
+
+------------------------------------------------------------------------------
+-- Sparse Matrix Multiplication.
+------------------------------------------------------------------------------
+
+local function sparse_mult(n, cycles, vy, val, row, col, vx)
+  for p=1,cycles do
+    for r=1,n do
+      local sum = 0
+      for i=row[r],row[r+1]-1 do sum = sum + vx[col[i]] * val[i] end
+      vy[r] = sum
+    end
+  end
+end
+
+function benchmarks.SPARSE(n, nz)
+  local nr = floor(nz/n)
+  local anz = nr*n
+  local vx = random_vector(n)
+  local val = random_vector(anz)
+  local vy, col, row = darray(n+1), iarray(nz+1), iarray(n+2)
+  row[1] = 1
+  for r=1,n do
+    local step = floor(r/nr)
+    if step < 1 then step = 1 end
+    local rr = row[r]
+    row[r+1] = rr+nr
+    for i=0,nr-1 do col[rr+i] = 1+i*step end
+  end
+  return function(cycles)
+    sparse_mult(n, cycles, vy, val, row, col, vx)
+    return anz*cycles*2
+  end
+end
+
+------------------------------------------------------------------------------
+-- LU: Dense Matrix Factorization.
+------------------------------------------------------------------------------
+
+local function lu_factor(a, pivot, m, n)
+  local min_m_n = m < n and m or n
+  for j=1,min_m_n do
+    local jp, t = j, abs(a[j][j])
+    for i=j+1,m do
+      local ab = abs(a[i][j])
+      if ab > t then
+	jp = i
+	t = ab
+      end
+    end
+    pivot[j] = jp
+    if a[jp][j] == 0 then error("zero pivot") end
+    if jp ~= j then a[j], a[jp] = a[jp], a[j] end
+    if j < m then
+      local recp = 1.0 / a[j][j]
+      for k=j+1,m do
+	local v = a[k]
+	v[j] = v[j] * recp
+      end
+    end
+    if j < min_m_n then
+      for i=j+1,m do
+	local vi, vj = a[i], a[j]
+	local eij = vi[j]
+	for k=j+1,n do vi[k] = vi[k] - eij * vj[k] end
+      end
+    end
+  end
+end
+
+local function matrix_alloc(m, n)
+  local a = {}
+  for y=1,m do a[y] = darray(n+1) end
+  return a
+end
+
+local function matrix_copy(dst, src, m, n)
+  for y=1,m do
+    local vd, vs = dst[y], src[y]
+    for x=1,n do vd[x] = vs[x] end
+  end
+end
+
+function benchmarks.LU(n)
+  local mat = random_matrix(n, n)
+  local tmp = matrix_alloc(n, n)
+  local pivot = iarray(n+1)
+  return function(cycles)
+    for i=1,cycles do
+      matrix_copy(tmp, mat, n, n)
+      lu_factor(tmp, pivot, n, n)
+    end
+    return 2.0/3.0*n*n*n*cycles
+  end
+end
+
+------------------------------------------------------------------------------
+-- Main program.
+------------------------------------------------------------------------------
+
+local function printf(...)
+  io.write(format(...))
+end
+
+local function fmtparams(p1, p2)
+  if p2 then return format("[%d, %d]", p1, p2)
+  elseif p1 then return format("[%d]", p1) end
+  return ""
+end
+
+local function measure(min_time, name, ...)
+  array_init()
+  rand_init(RANDOM_SEED)
+  local run = benchmarks[name](...)
+  local cycles = 1
+  repeat
+    local tm = clock()
+    local flops = run(cycles, ...)
+    tm = clock() - tm
+    if tm >= min_time then
+      local res = flops / tm * 1.0e-6
+      local p1, p2 = ...
+      printf("%-7s %8.2f  %s\n", name, res, fmtparams(...))
+      return res
+    end
+    cycles = cycles * 2
+  until false
+end
+
+printf("Lua SciMark %s based on SciMark 2.0a. %s.\n\n",
+       SCIMARK_VERSION, SCIMARK_COPYRIGHT)
+
+while arg and arg[1] do
+  local a = table.remove(arg, 1)
+  if a == "-noffi" then
+    package.preload.ffi = nil
+  elseif a == "-small" then
+    SIZE_SELECT = "small"
+  elseif a == "-large" then
+    SIZE_SELECT = "large"
+  elseif benchmarks[a] then
+    local p = benchmarks[SIZE_SELECT][a]
+    measure(MIN_TIME, a, tonumber(arg[1]) or p[1], tonumber(arg[2]) or p[2])
+    return
+  else
+    printf("Usage: scimark [-noffi] [-small|-large] [BENCH params...]\n\n")
+    printf("BENCH   -small         -large\n")
+    printf("---------------------------------------\n")
+    for _,name in ipairs(benchmarks) do
+      printf("%-7s %-13s %s\n", name,
+	     fmtparams(unpack(benchmarks.small[name])),
+	     fmtparams(unpack(benchmarks.large[name])))
+    end
+    printf("\n")
+    os.exit(1)
+  end
+end
+
+local params = benchmarks[SIZE_SELECT]
+local sum = 0
+for _,name in ipairs(benchmarks) do
+  sum = sum + measure(MIN_TIME, name, unpack(params[name]))
+end
+printf("\nSciMark %8.2f  [%s problem sizes]\n", sum / #benchmarks, SIZE_SELECT)
+io.flush()

+ 17 - 0
luajit.mod/examples/sieve.bmx

@@ -0,0 +1,17 @@
+SuperStrict
+
+' Compare speed by uncommenting/commenting different frameworks
+'Framework pub.lua
+Framework zeke.luajit
+
+Import brl.standardio
+
+Global l:Byte Ptr = luaL_newstate()
+
+luaL_openlibs(l)
+
+If luaL_dofile(l , "sieve.lua") Then 
+	Print lua_tostring(l , - 1)
+EndIf
+Print "done."
+

+ 38 - 0
luajit.mod/examples/sieve.lua

@@ -0,0 +1,38 @@
+--jit.off()
+
+function sieve(n)
+  x = {}
+  iter = 0
+  repeat
+    x[1] = 0
+    i = 2
+    repeat
+      x[i] = 1
+      i = i + 1
+    until i > n
+    p = 2
+    while(p * p <= n) do
+      j = p
+      while(j <= n) do
+        x[j] = 0
+        j = j + p
+      end
+      repeat
+        p = p + 1
+      until x[p] == 1
+    end
+    iter = iter + 1
+  until iter == 101
+end
+
+print("Sieve of Eratosthenes - Lua Benchmark test 0.00001")
+print("Start testing .....")
+start = os.clock()
+sieve(100000)
+stop = os.clock()
+print("Done!")
+print("Total Time = "..(stop - start))
+--Pub.lua time=13.218
+--LuaJIT  time=4.933
+--LuaJIT2 time=0.397
+

+ 51 - 0
luajit.mod/glue.c

@@ -0,0 +1,51 @@
+#include "luajit/src/lua.h"
+#include "brl.mod/blitz.mod/blitz.h"
+
+lua_Debug * bmx_luajit_lua_Debug_new() {
+	return (lua_Debug*)malloc(sizeof(lua_Debug));
+}
+
+void bmx_luajit_lua_Debug_free(lua_Debug * dbg) {
+	free(dbg);
+}
+
+int bmx_luajit_lua_Debug_event(lua_Debug * dbg) {
+	return dbg->event;
+}
+
+BBString * bmx_luajit_lua_Debug_name(lua_Debug * dbg) {
+	return bbStringFromCString(dbg->name);
+}
+
+BBString * bmx_luajit_lua_Debug_namewhat(lua_Debug * dbg) {
+	return bbStringFromCString(dbg->namewhat);
+}
+
+BBString * bmx_luajit_lua_Debug_what(lua_Debug * dbg) {
+	return bbStringFromCString(dbg->what);
+}
+
+BBString * bmx_luajit_lua_Debug_source(lua_Debug * dbg) {
+	return bbStringFromCString(dbg->source);
+}
+
+int bmx_luajit_lua_Debug_currentline(lua_Debug * dbg) {
+	return dbg->currentline;
+}
+
+int bmx_luajit_lua_Debug_nups(lua_Debug * dbg) {
+	return dbg->nups;
+}
+
+int bmx_luajit_lua_Debug_linedefined(lua_Debug * dbg) {
+	return dbg->linedefined;
+}
+
+int bmx_luajit_lua_Debug_lastlinedefined(lua_Debug * dbg) {
+	return dbg->lastlinedefined;
+}
+
+char * bmx_luajit_lua_Debug_short_src(lua_Debug * dbg) {
+	return dbg->short_src;
+}
+

BIN
luajit.mod/lib/linux/libluajit_x64.a


BIN
luajit.mod/lib/linux/libluajit_x86.a


BIN
luajit.mod/lib/macos/libluajit_x64.a


BIN
luajit.mod/lib/macos/libluajit_x86.a


+ 1085 - 0
luajit.mod/luajit.bmx

@@ -0,0 +1,1085 @@
+SuperStrict
+
+Rem
+bbdoc: LuaJIT
+end rem
+Module zeke.luajit
+
+ModuleInfo "Version: 1.12"
+ModuleInfo "Author: Zeke"
+
+ModuleInfo "History: V1.12"
+ModuleInfo "History: Updated to LuaJIT 2.0.4"
+ModuleInfo "History: Added 64-bit support."
+ModuleInfo "History: V1.11 Release"
+ModuleInfo "History: updated to LuaJIT 2.0.1"
+ModuleInfo "History: V1.10 Release"
+ModuleInfo "History: Stable LuaJIT 2.0.0 release"
+ModuleInfo "History: V1.09 Release"
+ModuleInfo "History: updated to LuaJIT 2.0.0-rc3"
+ModuleInfo "History: V1.08 Release"
+ModuleInfo "History: Updated to LUAJIT 2.0.0-rc2"
+ModuleInfo "History: V1.07 Release"
+ModuleInfo "History: Updated to LuaJIT 2.0.0-rc1"
+ModuleInfo "History: V1.06 Release"
+ModuleInfo "History: Updated to LuaJIT 2.0 beta 11"
+ModuleInfo "History: V1.05 Release"
+ModuleInfo "History: Updated to LuaJIT 2.0 beta 10"
+ModuleInfo "History: V1.04 Release"
+ModuleInfo "History: Updated to LuaJIT 2.0 beta 9"
+ModuleInfo "History: V1.03 Release"
+ModuleInfo "History: Updated to LuaJIT 2.0 beta 8"
+ModuleInfo "History: V1.02 Release"
+ModuleInfo "History: Updated to LuaJIT 2.0 beta 7"
+ModuleInfo "History: v1.01 Release"
+ModuleInfo "History: Updated to LuaJIT 2.0 beta 5"
+ModuleInfo "History: 1.00 Release"
+ModuleInfo "History: First release"
+
+
+
+' linkage
+?linuxx86
+ModuleInfo "LD_OPTS: -L%PWD%/lib/linux"
+Import "-lluajit_x86"
+?linuxx64
+ModuleInfo "LD_OPTS: -L%PWD%/lib/linux"
+Import "-lluajit_x64"
+?win32x86
+ModuleInfo "LD_OPTS: -L%PWD%/lib/win32"
+?win32x64
+ModuleInfo "LD_OPTS: -L%PWD%/lib/win32"
+?macosx86
+ModuleInfo "LD_OPTS: -L%PWD%/lib/macos"
+Import "-lluajit_x86"
+?macosx64
+ModuleInfo "LD_OPTS: -L%PWD%/lib/macos"
+ModuleInfo "LD_OPTS: -pagezero_size 10000 -image_base 100000000"
+Import "-lluajit_x64"
+?ios
+ModuleInfo "LD_OPTS: -L%PWD%/lib/ios"
+?raspberrypi
+ModuleInfo "LD_OPTS: -L%PWD%/lib/linux"
+?
+
+?linux
+Import "-ldl"
+?
+
+' source
+
+Import "luajit/src/*.h"
+Import "glue.c"
+
+
+
+
+
+
+
+' ******************************************************************************
+' *                                                                            *
+' *                            Constant Definitions                            *
+' *                                                                            *
+' ******************************************************************************
+
+  Const LUA_IDSIZE:Int = 1024
+
+' **** (lua.h) some basic definitions - just to be complete ****
+
+  Const LUA_VERSION:String   = "Lua 5.1"
+  Const LUA_RELEASE:String   = "Lua 5.1.4"
+  Const LUA_VERSION_NUM:Int  = 501
+  Const LUA_COPYRIGHT:String = "Copyright (C) 1994-2008 Lua.org, PUC-Rio"
+  Const LUA_AUTHORS:String   = "R. Ierusalimschy, L. H. de Figueiredo & W. Celes"
+
+' **** (lua.h) option for multiple returns in `lua_pcall' and `lua_call' ****
+
+  Const LUA_MULTRET:Int = -1
+
+' **** (lua.h) pseudo-indices ****
+
+  Const LUA_REGISTRYINDEX:Int = -10000
+  Const LUA_ENVIRONINDEX:Int  = -10001
+  Const LUA_GLOBALSINDEX:Int  = -10002
+
+' **** (lua.h) thread status (0 is OK) ****
+
+  Const LUA_YIELD_:Int    = 1   ' added _ after LUA_YIELD because of lua_yield function
+  Const LUA_ERRRUN:Int    = 2
+  Const LUA_ERRSYNTAX:Int = 3
+  Const LUA_ERRMEM:Int    = 4
+  Const LUA_ERRERR:Int    = 5
+
+' **** (lua.h) basic types ****
+
+  Const LUA_TNONE:Int          = -1
+  Const LUA_TNIL:Int           =  0
+  Const LUA_TBOOLEAN:Int       =  1
+  Const LUA_TLIGHTUSERDATA:Int =  2
+  Const LUA_TNUMBER:Int        =  3
+  Const LUA_TSTRING:Int        =  4
+  Const LUA_TTABLE:Int         =  5
+  Const LUA_TFUNCTION:Int      =  6
+  Const LUA_TUSERDATA:Int      =  7
+  Const LUA_TTHREAD:Int        =  8
+
+' **** (lua.h) garbage-collection options ****
+
+  Const LUA_GCSTOP:Int = 0
+  Const LUA_GCRESTART:Int    = 1
+  Const LUA_GCCOLLECT:Int    = 2
+  Const LUA_GCCOUNT:Int      = 3
+  Const LUA_GCCOUNTB:Int     = 4
+  Const LUA_GCSTEP:Int       = 5
+  Const LUA_GCSETPAUSE:Int = 6
+  Const LUA_GCSETSTEPMUL:Int = 7
+
+' **** (lua.h) event codes ****
+
+  Const LUA_HOOKCALL:Int    = 0
+  Const LUA_HOOKRET:Int     = 1
+  Const LUA_HOOKLINE:Int    = 2
+  Const LUA_HOOKCOUNT:Int   = 3
+  Const LUA_HOOKTAILRET:Int = 4
+
+' **** (lua.h) event masks ****
+
+  Const LUA_MASKCALL:Int  = (1 Shl LUA_HOOKCALL)
+  Const LUA_MASKRET:Int   = (1 Shl LUA_HOOKRET)
+  Const LUA_MASKLINE:Int  = (1 Shl LUA_HOOKLINE)
+  Const LUA_MASKCOUNT:Int = (1 Shl LUA_HOOKCOUNT)
+
+	Const LUAJIT_MODE_ENGINE:Int 	 	 = 0 'Set mode for whole JIT engine.
+	Const LUAJIT_MODE_DEBUG:Int 		 	 = 1 'Set debug mode (idx = level).
+	
+	Const LUAJIT_MODE_FUNC:Int 			 = 2 'Change mode for a function.
+	Const LUAJIT_MODE_ALLFUNC:Int		 = 3 'Recurse into subroutine protos.
+	Const LUAJIT_MODE_ALLSUBFUNC:Int	 = 4 'Change only the subroutines.
+	
+	
+	Const LUAJIT_MODE_TRACE:Int 			 = 5 'Flush a compiled trace.
+	
+	Const LUAJIT_MODE_WRAPFUNC:Int 		 = $10 'Set wrapper mode for C function calls.
+	
+	Const LUAJIT_MODE_MAX:Int 			 = $11
+	
+
+	Const LUAJIT_MODE_OFF:Int		 = 0
+	Const LUAJIT_MODE_ON:Int		 = $100
+	Const LUAJIT_MODE_FLUSH:Int	 = $200
+	
+	
+
+' ******************************************************************************
+' *                                                                            *
+' *                          The Lua API (Functions)                           *
+' *                                                                            *
+' ******************************************************************************
+
+Extern
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_atpanic">Lua Reference Manual</a>
+end rem
+  Function lua_atpanic:Byte Ptr (lua_state:Byte Ptr, panicf:Int(ls:Byte Ptr))
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_call">Lua Reference Manual</a>
+end rem
+  Function lua_call (lua_state:Byte Ptr, nargs:Int, nresults:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_checkstack">Lua Reference Manual</a>
+end rem
+  Function lua_checkstack:Int (lua_state:Byte Ptr, extra:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_close">Lua Reference Manual</a>
+end rem
+  Function lua_close (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_concat">Lua Reference Manual</a>
+end rem
+  Function lua_concat (lua_state:Byte Ptr, n:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_cpcall">Lua Reference Manual</a>
+end rem
+  Function lua_cpcall:Int (lua_state:Byte Ptr, func:Int(ls:Byte Ptr), ud:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_createtable">Lua Reference Manual</a>
+end rem
+  Function lua_createtable (lua_state:Byte Ptr, narr:Int, nrec:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_dump">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function lua_dump:Int (lua_state:Byte Ptr, writer:Int(ls:Byte Ptr,p:Byte Ptr,sz:Int,ud:Byte Ptr), data:Byte Ptr)
+?ptr64
+  Function lua_dump:Int (lua_state:Byte Ptr, writer:Int(ls:Byte Ptr,p:Byte Ptr,sz:Long,ud:Byte Ptr), data:Byte Ptr)
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_equal">Lua Reference Manual</a>
+end rem
+  Function lua_equal:Int (lua_state:Byte Ptr, index1:Int, index2:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_error">Lua Reference Manual</a>
+end rem
+  Function lua_error:Int (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_gc">Lua Reference Manual</a>
+end rem
+  Function lua_gc:Int (lua_state:Byte Ptr, what:Int, data:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_getallocf">Lua Reference Manual</a>
+end rem
+  Function lua_getallocf:Byte Ptr (lua_state:Byte Ptr, ud:Byte Ptr Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_getfenv">Lua Reference Manual</a>
+end rem
+  Function lua_getfenv (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_getfield">Lua Reference Manual</a>
+end rem
+  Function lua_getfield (lua_state:Byte Ptr, index:Int, k$z)              ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_gethook">Lua Reference Manual</a>
+end rem
+  Function lua_gethook:Byte Ptr (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_gethookcount">Lua Reference Manual</a>
+end rem
+  Function lua_gethookcount:Int (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_gethookmask">Lua Reference Manual</a>
+end rem
+  Function lua_gethookmask:Int (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_getmetatable">Lua Reference Manual</a>
+end rem
+  Function lua_getmetatable:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_gettable">Lua Reference Manual</a>
+end rem
+  Function lua_gettable (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_gettop">Lua Reference Manual</a>
+end rem
+  Function lua_gettop:Int (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_getupvalue">Lua Reference Manual</a>
+end rem
+  Function lua_getupvalue$z (lua_state:Byte Ptr, funcindex:Int, n:Int)        ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_insert">Lua Reference Manual</a>
+end rem
+  Function lua_insert (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_iscfunction">Lua Reference Manual</a>
+end rem
+  Function lua_iscfunction:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_isnumber">Lua Reference Manual</a>
+end rem
+  Function lua_isnumber:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_isstring">Lua Reference Manual</a>
+end rem
+  Function lua_isstring:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_isuserdata">Lua Reference Manual</a>
+end rem
+  Function lua_isuserdata:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_lessthan">Lua Reference Manual</a>
+end rem
+  Function lua_lessthan:Int (lua_state:Byte Ptr, index1:Int, index2:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_load">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function lua_load:Int (lua_state:Byte Ptr, reader:Byte Ptr(ls:Byte Ptr,data:Byte Ptr,sz:Int Ptr), data:Byte Ptr, chunkname$z) ' no ~0 expected
+?ptr64
+  Function lua_load:Int (lua_state:Byte Ptr, reader:Byte Ptr(ls:Byte Ptr,data:Byte Ptr,sz:Long Ptr), data:Byte Ptr, chunkname$z) ' no ~0 expected
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_newstate">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function lua_newstate:Byte Ptr (f:Byte Ptr(ud:Byte Ptr, p:Byte Ptr, osize:Int, nsize:Int), ud:Byte Ptr)
+?ptr64
+  Function lua_newstate:Byte Ptr (f:Byte Ptr(ud:Byte Ptr, p:Byte Ptr, osize:Long, nsize:Long), ud:Byte Ptr)
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_newthread">Lua Reference Manual</a>
+end rem
+  Function lua_newthread:Byte Ptr (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_newuserdata">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function lua_newuserdata:Byte Ptr (lua_state:Byte Ptr, size:Int)
+?ptr64
+  Function lua_newuserdata:Byte Ptr (lua_state:Byte Ptr, size:Long)
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_next">Lua Reference Manual</a>
+end rem
+  Function lua_next:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_objlen">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function lua_objlen:Int (lua_state:Byte Ptr, index:Int)
+?ptr64
+  Function lua_objlen:Long (lua_state:Byte Ptr, index:Int)
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pcall">Lua Reference Manual</a>
+end rem
+  Function lua_pcall:Int (lua_state:Byte Ptr, nargs:Int, nresults:Int, errfunc:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushboolean">Lua Reference Manual</a>
+end rem
+  Function lua_pushboolean (lua_state:Byte Ptr, b:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushcclosure">Lua Reference Manual</a>
+end rem
+  Function lua_pushcclosure (lua_state:Byte Ptr, fn:Int(ls:Byte Ptr), n:Int)
+' function lua_pushfstring$z (lua_state:byte ptr, fmt$z, ...)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushinteger">Lua Reference Manual</a>
+end rem
+  Function lua_pushinteger (lua_state:Byte Ptr, n:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushlightuserdata">Lua Reference Manual</a>
+end rem
+  Function lua_pushlightuserdata (lua_state:Byte Ptr, p:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushlstring">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function lua_pushlstring (lua_state:Byte Ptr, s:Byte Ptr, size:Int)    ' without any conversion!
+?ptr64
+  Function lua_pushlstring (lua_state:Byte Ptr, s:Byte Ptr, size:Long)    ' without any conversion!
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushnil">Lua Reference Manual</a>
+end rem
+  Function lua_pushnil (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushnumber">Lua Reference Manual</a>
+end rem
+  Function lua_pushnumber (lua_state:Byte Ptr, n:Double)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushstring">Lua Reference Manual</a>
+end rem
+  Function lua_pushstring (lua_state:Byte Ptr, s$z)                         ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushthread">Lua Reference Manual</a>
+end rem
+  Function lua_pushthread:Int (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushvalue">Lua Reference Manual</a>
+end rem
+  Function lua_pushvalue (lua_state:Byte Ptr, index:Int)
+' function lua_pushvfstring$z (lua_state:byte ptr, fmt$z, argp:???)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_rawequal">Lua Reference Manual</a>
+end rem
+  Function lua_rawequal:Int (lua_state:Byte Ptr, index1:Int, index2:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_rawget">Lua Reference Manual</a>
+end rem
+  Function lua_rawget (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_rawgeti">Lua Reference Manual</a>
+end rem
+  Function lua_rawgeti (lua_state:Byte Ptr, index:Int, n:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_rawset">Lua Reference Manual</a>
+end rem
+  Function lua_rawset (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_rawseti">Lua Reference Manual</a>
+end rem
+  Function lua_rawseti (lua_state:Byte Ptr, index:Int, n:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_remove">Lua Reference Manual</a>
+end rem
+  Function lua_remove (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_replace">Lua Reference Manual</a>
+end rem
+  Function lua_replace (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_resume">Lua Reference Manual</a>
+end rem
+  Function lua_resume:Int (lua_state:Byte Ptr, narg:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_setallocf">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function lua_setallocf (lua_state:Byte Ptr, f:Byte Ptr(ud:Byte Ptr, p:Byte Ptr, osize:Int, nsize:Int), ud:Byte Ptr)
+?ptr64
+  Function lua_setallocf (lua_state:Byte Ptr, f:Byte Ptr(ud:Byte Ptr, p:Byte Ptr, osize:Long, nsize:Long), ud:Byte Ptr)
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_setfenv">Lua Reference Manual</a>
+end rem
+  Function lua_setfenv:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_setfield">Lua Reference Manual</a>
+end rem
+  Function lua_setfield (lua_state:Byte Ptr, index:Int, k$z)              ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_setmetatable">Lua Reference Manual</a>
+end rem
+  Function lua_setmetatable:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_settable">Lua Reference Manual</a>
+end rem
+  Function lua_settable (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_settop">Lua Reference Manual</a>
+end rem
+  Function lua_settop (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_setupvalue">Lua Reference Manual</a>
+end rem
+  Function lua_setupvalue$z (lua_state:Byte Ptr, funcindex:Int, n:Int)        ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_status">Lua Reference Manual</a>
+end rem
+  Function lua_status:Int (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_toboolean">Lua Reference Manual</a>
+end rem
+  Function lua_toboolean:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_tocfunction">Lua Reference Manual</a>
+end rem
+  Function lua_tocfunction:Byte Ptr (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_tointeger">Lua Reference Manual</a>
+end rem
+  Function lua_tointeger:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_tolstring">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function lua_tolstring:Byte Ptr (lua_state:Byte Ptr, index:Int, size:Int Ptr) ' without any conversion!
+?ptr64
+  Function lua_tolstring:Byte Ptr (lua_state:Byte Ptr, index:Int, size:Long Ptr) ' without any conversion!
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_tonumber">Lua Reference Manual</a>
+end rem
+  Function lua_tonumber:Double (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_topointer">Lua Reference Manual</a>
+end rem
+  Function lua_topointer:Byte Ptr (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_tothread">Lua Reference Manual</a>
+end rem
+  Function lua_tothread:Byte Ptr (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_touserdata">Lua Reference Manual</a>
+end rem
+  Function lua_touserdata:Byte Ptr (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_type">Lua Reference Manual</a>
+end rem
+  Function lua_type:Int (lua_state:Byte Ptr, index:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_typename">Lua Reference Manual</a>
+end rem
+  Function lua_typename$z (lua_state:Byte Ptr, tp:Int)                      ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_xmove">Lua Reference Manual</a>
+end rem
+  Function lua_xmove                (fromState:Byte Ptr, toState:Byte Ptr, n:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_yield">Lua Reference Manual</a>
+end rem
+  Function lua_yield:Int (lua_state:Byte Ptr, nresults:Int)
+
+	'## LuaJIT functions ##
+	
+Rem
+bbdoc: see <a href="../../../../mod/zeke.mod/luajit2.mod/LuaJIT-2.0.0/doc/api.html#luaJIT_setmode">LuaJIT doc</a>
+end rem
+	Function luaJIT_setmode:Int(lua_state:Byte Ptr , idx:Int , mode:Int)
+End Extern
+
+
+' ******************************************************************************
+' *                                                                            *
+' *                            The Lua Debugger API                            *
+' *                                                                            *
+' ******************************************************************************
+
+Type lua_Debug
+
+	Field debugPtr:Byte Ptr
+
+	Method New()
+		debugPtr = bmx_luajit_lua_Debug_new()
+	End Method
+	
+	Method event:Int()
+		Return bmx_luajit_lua_Debug_event(debugPtr)
+	End Method
+	
+	Method name:String()                                         ' no ~0 expected
+		Return bmx_luajit_lua_Debug_name(debugPtr)
+	End Method
+	
+	Method namewhat:String()                                               ' dto.
+		Return bmx_luajit_lua_Debug_namewhat(debugPtr)
+	End Method
+
+	Method what:String()                                                   ' dto.
+		Return bmx_luajit_lua_Debug_what(debugPtr)
+	End Method
+
+	Method source:String()                                                 ' dto.
+		Return bmx_luajit_lua_Debug_source(debugPtr)
+	End Method
+
+	Method currentline:Int()
+		Return bmx_luajit_lua_Debug_currentline(debugPtr)
+	End Method
+
+	Method nups:Int()
+		Return bmx_luajit_lua_Debug_nups(debugPtr)
+	End Method
+
+	Method linedefined:Int()
+		Return bmx_luajit_lua_Debug_linedefined(debugPtr)
+	End Method
+
+	Method lastlinedefined:Int()
+		Return bmx_luajit_lua_Debug_lastlinedefined(debugPtr)
+	End Method
+
+	Method short_src:Byte Ptr()
+		Return bmx_luajit_lua_Debug_short_src(debugPtr)
+	End Method
+	
+	Method Delete()
+		If debugPtr Then
+			bmx_luajit_lua_Debug_free(debugPtr)
+			debugPtr = Null
+		End If
+	End Method
+End Type
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_getinfo">Lua Reference Manual</a>
+End Rem
+Function lua_getinfo:Int (lua_state:Byte Ptr, what:String, ar:lua_Debug)    ' no ~0 expected
+End Function
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_getlocal">Lua Reference Manual</a>
+End Rem
+Function lua_getlocal:String(lua_state:Byte Ptr, ar:lua_Debug Var, n:Int)     ' no ~0 expected
+End Function
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_getstack">Lua Reference Manual</a>
+End Rem
+Function lua_getstack:Int (lua_state:Byte Ptr, level:Int, ar:lua_Debug) ' no ~0 expected
+End Function
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_sethook">Lua Reference Manual</a>
+End Rem
+Function lua_sethook:Int (lua_state:Byte Ptr, f(ls:Byte Ptr,ar:lua_Debug), mask:Int, count:Int)
+End Function
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_setlocal">Lua Reference Manual</a>
+End Rem
+Function lua_setlocal:String(lua_state:Byte Ptr, ar:lua_Debug, n:Int)     ' no ~0 expected
+End Function
+
+' ******************************************************************************
+' *                                                                            *
+' *                            The Lua API (Macros)                            *
+' *                                                                            *
+' ******************************************************************************
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_getglobal">Lua Reference Manual</a>
+end rem
+  Function lua_getglobal (lua_state:Byte Ptr, name:String)
+    lua_getfield(lua_state, LUA_GLOBALSINDEX, name)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_isboolean">Lua Reference Manual</a>
+end rem
+  Function lua_isboolean:Int (lua_state:Byte Ptr, index:Int)
+    Return (lua_type(lua_state,index) = LUA_TBOOLEAN)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_isfunction">Lua Reference Manual</a>
+end rem
+  Function lua_isfunction:Int (lua_state:Byte Ptr, index:Int)
+    Return (lua_type(lua_state,index) = LUA_TFUNCTION)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_islightuserdata">Lua Reference Manual</a>
+end rem
+  Function lua_islightuserdata:Int (lua_state:Byte Ptr, index:Int)
+    Return (lua_type(lua_state,index) = LUA_TLIGHTUSERDATA)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_isnil">Lua Reference Manual</a>
+end rem
+  Function lua_isnil:Int (lua_state:Byte Ptr, index:Int)
+    Return (lua_type(lua_state,index) = LUA_TNIL)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_isnone">Lua Reference Manual</a>
+end rem
+  Function lua_isnone:Int (lua_state:Byte Ptr, index:Int)
+    Return (lua_type(lua_state,index) = LUA_TNONE)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_isnoneornil">Lua Reference Manual</a>
+end rem
+  Function lua_isnoneornil:Int (lua_state:Byte Ptr, index:Int)
+    Return (lua_type(lua_state,index) <= 0)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_istable">Lua Reference Manual</a>
+end rem
+  Function lua_istable:Int (lua_state:Byte Ptr, index:Int)
+    Return (lua_type(lua_state,index) = LUA_TTABLE)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_isthread">Lua Reference Manual</a>
+end rem
+  Function lua_isthread:Int (lua_state:Byte Ptr, index:Int)
+    Return (lua_type(lua_state,index) = LUA_TTHREAD)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_newtable">Lua Reference Manual</a>
+end rem
+  Function lua_newtable (lua_state:Byte Ptr)
+    lua_createtable(lua_state,0,0)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pop">Lua Reference Manual</a>
+end rem
+  Function lua_pop (lua_state:Byte Ptr, n:Int)
+    lua_settop(lua_state,-(n)-1)
+  End Function
+
+  Function lua_pushbytearray (lua_state:Byte Ptr, Buffer:Byte[])
+    lua_pushlstring(lua_state, Varptr Buffer[0], Buffer.length)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_pushcfunction">Lua Reference Manual</a>
+end rem
+  Function lua_pushcfunction (lua_state:Byte Ptr, fn:Int(ls:Byte Ptr))
+    lua_pushcclosure(lua_state, fn, 0)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_register">Lua Reference Manual</a>
+end rem
+  Function lua_register (lua_state:Byte Ptr, name:String, f:Int(ls:Byte Ptr))
+    lua_pushcfunction(lua_state, f)
+    lua_setglobal    (lua_state, name)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_setglobal">Lua Reference Manual</a>
+end rem
+  Function lua_setglobal (lua_state:Byte Ptr, name:String)
+    lua_setfield(lua_state, LUA_GLOBALSINDEX, name)
+  End Function
+
+  Function lua_tobytearray:Byte[] (lua_state:Byte Ptr, index:Int)
+?ptr64
+    Local Length:Long
+?Not ptr64
+   	Local Length:Int
+?
+    Local DataPtr:Byte Ptr = lua_tolstring(lua_state, index, Varptr Length)
+    If DataPtr = Null Then
+      Return Null
+    Else
+      Local Result:Byte[] = New Byte[Length]
+        MemCopy(Varptr Result[0], DataPtr, Length);
+      Return Result
+    End If
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#lua_tostring">Lua Reference Manual</a>
+end rem
+  Function lua_tostring:String (lua_state:Byte Ptr, index:Int)
+	If lua_type(lua_state,index) = LUA_TSTRING Then
+?ptr64
+	    Local Length:Long
+?Not ptr64
+    	Local Length:Int
+?
+	    Local StringPtr:Byte Ptr = lua_tolstring(lua_state, index, Varptr Length)
+    	If StringPtr = Null Then
+	      Return Null
+	    Else
+    	  Return String.fromBytes(StringPtr,Length)
+	    End If
+	Else
+		Return Null
+	End If
+  End Function
+
+' ******************************************************************************
+' *                                                                            *
+' *                     The Auxiliary Library (Functions)                      *
+' *                                                                            *
+' ******************************************************************************
+
+Extern
+  Type lua_Reg
+    Field name:Byte Ptr                                         ' no ~0 expected
+    Field func:Int(ls:Byte Ptr)
+  End Type
+End Extern
+
+Extern
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_addlstring">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function luaL_addlstring (B:Byte Ptr, s:Byte Ptr, l:Int)
+?ptr64
+  Function luaL_addlstring (B:Byte Ptr, s:Byte Ptr, l:Long)
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_addsize">Lua Reference Manual</a>
+end rem
+  Function luaL_addsize (B:Byte Ptr, size:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_addstring">Lua Reference Manual</a>
+end rem
+  Function luaL_addstring (B:Byte Ptr, s$z)                                 ' no ~0 allowed!
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_addvalue">Lua Reference Manual</a>
+end rem
+  Function luaL_addvalue (B:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_argerror">Lua Reference Manual</a>
+end rem
+  Function luaL_argerror:Int (lua_state:Byte Ptr, narg:Int, extramsg$z)     ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_buffinit">Lua Reference Manual</a>
+end rem
+  Function luaL_buffinit (lua_state:Byte Ptr, B:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_callmeta">Lua Reference Manual</a>
+end rem
+  Function luaL_callmeta:Int (lua_state:Byte Ptr, obj:Int, e$z)             ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checkany">Lua Reference Manual</a>
+end rem
+  Function luaL_checkany (lua_state:Byte Ptr, narg:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checkinteger">Lua Reference Manual</a>
+end rem
+  Function luaL_checkinteger:Int (lua_state:Byte Ptr, narg:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checklstring">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function luaL_checklstring:Byte Ptr (lua_state:Byte Ptr, narg:Int, size:Int Ptr)
+?ptr64
+  Function luaL_checklstring:Byte Ptr (lua_state:Byte Ptr, narg:Int, size:Long Ptr)
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checknumber">Lua Reference Manual</a>
+end rem
+  Function luaL_checknumber:Double (lua_state:Byte Ptr, narg:Int)
+' function luaL_checkoption:int (lua_state:byte ptr, narg:int, def$z, lst$z[])
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checkstack">Lua Reference Manual</a>
+end rem
+  Function luaL_checkstack (lua_state:Byte Ptr, sz:Int, msg$z)                     ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checktype">Lua Reference Manual</a>
+end rem
+  Function luaL_checktype (lua_state:Byte Ptr, narg:Int, t:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checkudata">Lua Reference Manual</a>
+end rem
+  Function luaL_checkudata:Byte Ptr (lua_state:Byte Ptr, narg:Int, tname$z)        ' no ~0 expected
+' function luaL_error:int (lua_state:byte ptr, fmt$z, ...)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_getmetafield">Lua Reference Manual</a>
+end rem
+  Function luaL_getmetafield:Int (lua_state:Byte Ptr, obj:Int, e$z)                ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_gsub">Lua Reference Manual</a>
+end rem
+  Function luaL_gsub$z (lua_state:Byte Ptr, s$z, p$z, r$z)                         ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_loadbuffer">Lua Reference Manual</a>
+end rem
+  Function luaL_loadbuffer:Int (lua_state:Byte Ptr, buff:Byte Ptr, sz:Int, name$z) ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_loadfile">Lua Reference Manual</a>
+end rem
+  Function luaL_loadfile:Int (lua_state:Byte Ptr, filename$z)                      ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_loadstring">Lua Reference Manual</a>
+end rem
+  Function luaL_loadstring:Int (lua_state:Byte Ptr, s$z)                           ' no ~0 allowed!
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_newmetatable">Lua Reference Manual</a>
+end rem
+  Function luaL_newmetatable:Int (lua_state:Byte Ptr, tname$z)                     ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_newstate">Lua Reference Manual</a>
+end rem
+  Function luaL_newstate:Byte Ptr ()
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_openlibs">Lua Reference Manual</a>
+end rem
+  Function luaL_openlibs (lua_state:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_optinteger">Lua Reference Manual</a>
+end rem
+  Function luaL_optinteger:Int (lua_state:Byte Ptr, narg:Int, d:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_optlstring">Lua Reference Manual</a>
+end rem
+?Not ptr64
+  Function luaL_optlstring:Byte Ptr (lua_state:Byte Ptr, narg:Int, d$z, size:Int Ptr) ' no ~0 expected in "d"
+?ptr64
+  Function luaL_optlstring:Byte Ptr (lua_state:Byte Ptr, narg:Int, d$z, size:Long Ptr) ' no ~0 expected in "d"
+?
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_optnumber">Lua Reference Manual</a>
+end rem
+  Function luaL_optnumber:Double (lua_state:Byte Ptr, narg:Int, d:Double)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_prepbuffer">Lua Reference Manual</a>
+end rem
+  Function luaL_prepbuffer:Byte Ptr (B:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_pushresult">Lua Reference Manual</a>
+end rem
+  Function luaL_pushresult (B:Byte Ptr)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_ref">Lua Reference Manual</a>
+end rem
+  Function luaL_ref:Int (lua_state:Byte Ptr, t:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_register">Lua Reference Manual</a>
+end rem
+  Function luaL_register (lua_state:Byte Ptr, libname$z, l:lua_Reg Ptr)            ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_typerror">Lua Reference Manual</a>
+end rem
+  Function luaL_typerror:Int (lua_state:Byte Ptr, narg:Int, tname$z)               ' no ~0 expected
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_unref">Lua Reference Manual</a>
+end rem
+  Function luaL_unref (lua_state:Byte Ptr, t:Int, ref:Int)
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_where">Lua Reference Manual</a>
+end rem
+  Function luaL_where (lua_state:Byte Ptr, lvl:Int)
+End Extern
+
+' ******************************************************************************
+' *                                                                            *
+' *                       The Auxiliary Library (Macros)                       *
+' *                                                                            *
+' ******************************************************************************
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_addchar">Lua Reference Manual</a>
+end rem
+  Function luaL_addchar (B:Byte Ptr, c:String)
+'    luaL_addstring(B,Left$(c,1))     ' not really efficient, just to be complete
+	luaL_addstring( B,c[..Min(c.Length,1)] )	'functionally equivalent?
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_argcheck">Lua Reference Manual</a>
+end rem
+  Function luaL_argcheck (lua_state:Byte Ptr, cond:Int, narg:Int, extramsg:String)
+    If (Not cond) Then luaL_argerror(lua_state, narg, extramsg)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checkint">Lua Reference Manual</a>
+end rem
+  Function luaL_checkint:Int (lua_state:Byte Ptr, narg:Int)
+    Return Int(luaL_checkinteger(lua_state, narg))   ' Lua itself does the same!
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checklong">Lua Reference Manual</a>
+end rem
+  Function luaL_checklong:Long (lua_state:Byte Ptr, narg:Int)
+    Return luaL_checkinteger(lua_state, narg)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_checkstring">Lua Reference Manual</a>
+end rem
+  Function luaL_checkstring:String (lua_state:Byte Ptr, narg:Int)
+?Not ptr64
+    Local Size:Int
+    Local StringPtr:Byte Ptr = luaL_checklstring(lua_state, narg, Varptr Size)
+?ptr64
+    Local Size:Long
+    Local StringPtr:Byte Ptr = luaL_checklstring(lua_state, narg, Varptr Size)
+?
+    If (StringPtr = Null) Then
+      Return Null
+    Else
+      Return String.fromBytes(StringPtr,Size)
+    End If
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_dofile">Lua Reference Manual</a>
+end rem
+  Function luaL_dofile:Int (lua_state:Byte Ptr, filename:String)
+    If (luaL_loadfile(lua_state,filename) <> 0) Then
+      Return 1
+    Else
+      Return lua_pcall(lua_state, 0, LUA_MULTRET, 0)
+    End If
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_dostring">Lua Reference Manual</a>
+end rem
+  Function luaL_dostring:Int (lua_state:Byte Ptr, str:String)
+    If (luaL_loadstring(lua_state,str) <> 0) Then
+      Return 1
+    Else
+      Return lua_pcall(lua_state, 0, LUA_MULTRET, 0)
+    End If
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_getmetatable">Lua Reference Manual</a>
+end rem
+  Function luaL_getmetatable (lua_state:Byte Ptr, tname:String)
+    lua_getfield(lua_state, LUA_REGISTRYINDEX, tname)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_optint">Lua Reference Manual</a>
+end rem
+  Function luaL_optint:Int (lua_state:Byte Ptr, narg:Int, d:Int)
+    Return luaL_optinteger(lua_state, narg, d)
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_optlong">Lua Reference Manual</a>
+end rem
+  Function luaL_optlong:Long (lua_state:Byte Ptr, narg:Int, d:Long)
+    If ((Abs(narg) > lua_gettop(lua_state)) Or lua_isnil(lua_state,narg)) Then
+      Return d
+    Else
+      Return luaL_checklong(lua_state,narg)
+    End If
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_optstring">Lua Reference Manual</a>
+end rem
+  Function luaL_optstring:String (lua_state:Byte Ptr, narg:Int, d:String)
+?Not ptr64
+    Local Size:Int
+    Local StringPtr:Byte Ptr = luaL_optlstring(lua_state, narg, d, Varptr Size)
+?ptr64
+    Local Size:Long
+    Local StringPtr:Byte Ptr = luaL_optlstring(lua_state, narg, d, Varptr Size)
+?
+    If (StringPtr = Null) Then
+      Return Null
+    Else
+      Return String.fromBytes(StringPtr,Size)
+    End If
+  End Function
+
+Rem
+bbdoc: see <a href="../../../../mod/pub.mod/lua.mod/lua-5.1.4/doc/manual.html#luaL_typename">Lua Reference Manual</a>
+end rem
+  Function luaL_typename:String (lua_state:Byte Ptr, idx:Int)
+    Return lua_typename(lua_state, lua_type (lua_state,idx))
+  End Function
+
+
+' ******************************************************************************
+' *                                                                            *
+' *     compatibility extensions (not to break existing axe.lua programs)      *
+' *                                                                            *
+' ******************************************************************************
+
+Extern
+  Function luaopen_base:Int    (lua_state:Byte Ptr)
+  Function luaopen_debug:Int   (lua_state:Byte Ptr)
+  Function luaopen_io:Int      (lua_state:Byte Ptr)
+  Function luaopen_math:Int    (lua_state:Byte Ptr)
+  Function luaopen_os:Int      (lua_state:Byte Ptr)
+  Function luaopen_package:Int (lua_state:Byte Ptr)
+  Function luaopen_string:Int  (lua_state:Byte Ptr)
+  Function luaopen_table:Int (lua_state:Byte Ptr)
+  Function luaopen_jit:Int		(lua_state:Byte Ptr)
+End Extern
+
+  Function lua_dobuffer:Int (lua_state:Byte Ptr, buff:String, sz:Int, name:String)
+    If (luaL_loadbuffer(lua_state,buff,sz,name) <> 0) Then
+      Return 1
+    Else
+      Return lua_pcall(lua_state, 0, LUA_MULTRET, 0)
+    End If
+  End Function
+
+  Function lua_dofile:Int (lua_state:Byte Ptr, filename:String)
+    Return luaL_dofile(lua_state,filename)
+  End Function
+
+  Function lua_dostring:Int (lua_state:Byte Ptr, str:String)
+    Return luaL_dostring(lua_state,str)
+  End Function
+
+  Function lua_strlen:Int (lua_state:Byte Ptr, index:Int)
+    Return lua_objlen(lua_state,index)
+  End Function
+	
+
+Extern
+	Function bmx_luajit_lua_Debug_new:Byte Ptr()
+	Function bmx_luajit_lua_Debug_free(handle:Byte Ptr)
+	
+	Function bmx_luajit_lua_Debug_event:Int(handle:Byte Ptr)
+	Function bmx_luajit_lua_Debug_name:String(handle:Byte Ptr)
+	Function bmx_luajit_lua_Debug_namewhat:String(handle:Byte Ptr)
+	Function bmx_luajit_lua_Debug_what:String(handle:Byte Ptr)
+	Function bmx_luajit_lua_Debug_source:String(handle:Byte Ptr)
+	Function bmx_luajit_lua_Debug_currentline:Int(handle:Byte Ptr)
+	Function bmx_luajit_lua_Debug_nups:Int(handle:Byte Ptr)
+	Function bmx_luajit_lua_Debug_linedefined:Int(handle:Byte Ptr)
+	Function bmx_luajit_lua_Debug_lastlinedefined:Int(handle:Byte Ptr)
+	Function bmx_luajit_lua_Debug_short_src:Byte Ptr(handle:Byte Ptr)
+End Extern

+ 56 - 0
luajit.mod/luajit/COPYRIGHT

@@ -0,0 +1,56 @@
+===============================================================================
+LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
+
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+[ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+
+===============================================================================
+[ LuaJIT includes code from Lua 5.1/5.2, which has this license statement: ]
+
+Copyright (C) 1994-2012 Lua.org, PUC-Rio.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+[ LuaJIT includes code from dlmalloc, which has this license statement: ]
+
+This is a version (aka dlmalloc) of malloc/free/realloc written by
+Doug Lea and released to the public domain, as explained at
+http://creativecommons.org/licenses/publicdomain
+
+===============================================================================

+ 151 - 0
luajit.mod/luajit/Makefile

@@ -0,0 +1,151 @@
+##############################################################################
+# LuaJIT top level Makefile for installation. Requires GNU Make.
+#
+# Please read doc/install.html before changing any variables!
+#
+# Suitable for POSIX platforms (Linux, *BSD, OSX etc.).
+# Note: src/Makefile has many more configurable options.
+#
+# ##### This Makefile is NOT useful for Windows! #####
+# For MSVC, please follow the instructions given in src/msvcbuild.bat.
+# For MinGW and Cygwin, cd to src and run make with the Makefile there.
+#
+# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+##############################################################################
+
+MAJVER=  2
+MINVER=  0
+RELVER=  4
+VERSION= $(MAJVER).$(MINVER).$(RELVER)
+ABIVER=  5.1
+
+##############################################################################
+#
+# Change the installation path as needed. This automatically adjusts
+# the paths in src/luaconf.h, too. Note: PREFIX must be an absolute path!
+#
+export PREFIX= /usr/local
+export MULTILIB= lib
+##############################################################################
+
+DPREFIX= $(DESTDIR)$(PREFIX)
+INSTALL_BIN=   $(DPREFIX)/bin
+INSTALL_LIB=   $(DPREFIX)/$(MULTILIB)
+INSTALL_SHARE= $(DPREFIX)/share
+INSTALL_INC=   $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER)
+
+INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(VERSION)
+INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit
+INSTALL_LMODD= $(INSTALL_SHARE)/lua
+INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER)
+INSTALL_CMODD= $(INSTALL_LIB)/lua
+INSTALL_CMOD= $(INSTALL_CMODD)/$(ABIVER)
+INSTALL_MAN= $(INSTALL_SHARE)/man/man1
+INSTALL_PKGCONFIG= $(INSTALL_LIB)/pkgconfig
+
+INSTALL_TNAME= luajit-$(VERSION)
+INSTALL_TSYMNAME= luajit
+INSTALL_ANAME= libluajit-$(ABIVER).a
+INSTALL_SONAME= libluajit-$(ABIVER).so.$(MAJVER).$(MINVER).$(RELVER)
+INSTALL_SOSHORT= libluajit-$(ABIVER).so
+INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib
+INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib
+INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib
+INSTALL_PCNAME= luajit.pc
+
+INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME)
+INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME)
+INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT)
+INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT)
+INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME)
+INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME)
+INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME)
+
+INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \
+  $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD)
+UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \
+  $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD)
+
+RM= rm -f
+MKDIR= mkdir -p
+RMDIR= rmdir 2>/dev/null
+SYMLINK= ln -sf
+INSTALL_X= install -m 0755
+INSTALL_F= install -m 0644
+UNINSTALL= $(RM)
+LDCONFIG= ldconfig -n
+SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \
+            -e "s|^multilib=.*|multilib=$(MULTILIB)|"
+
+FILE_T= luajit
+FILE_A= libluajit.a
+FILE_SO= libluajit.so
+FILE_MAN= luajit.1
+FILE_PC= luajit.pc
+FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
+FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \
+	      dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua
+
+ifeq (,$(findstring Windows,$(OS)))
+  ifeq (Darwin,$(shell uname -s))
+    INSTALL_SONAME= $(INSTALL_DYLIBNAME)
+    INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_DYLIBSHORT1)
+    INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_DYLIBSHORT2)
+    LDCONFIG= :
+  endif
+endif
+
+##############################################################################
+
+INSTALL_DEP= src/luajit
+
+default all $(INSTALL_DEP):
+	@echo "==== Building LuaJIT $(VERSION) ===="
+	$(MAKE) -C src
+	@echo "==== Successfully built LuaJIT $(VERSION) ===="
+
+install: $(INSTALL_DEP)
+	@echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ===="
+	$(MKDIR) $(INSTALL_DIRS)
+	cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
+	cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
+	$(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
+	cd src && test -f $(FILE_SO) && \
+	  $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
+	  $(LDCONFIG) $(INSTALL_LIB) && \
+	  $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \
+	  $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || :
+	cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN)
+	cd etc && $(SED_PC) $(FILE_PC) > $(FILE_PC).tmp && \
+	  $(INSTALL_F) $(FILE_PC).tmp $(INSTALL_PC) && \
+	  $(RM) $(FILE_PC).tmp
+	cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
+	cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
+	$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
+	@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
+
+uninstall:
+	@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
+	$(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
+	for file in $(FILES_JITLIB); do \
+	  $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
+	  done
+	for file in $(FILES_INC); do \
+	  $(UNINSTALL) $(INSTALL_INC)/$$file; \
+	  done
+	$(LDCONFIG) $(INSTALL_LIB)
+	$(RMDIR) $(UNINSTALL_DIRS) || :
+	@echo "==== Successfully uninstalled LuaJIT $(VERSION) from $(PREFIX) ===="
+
+##############################################################################
+
+amalg:
+	@echo "Building LuaJIT $(VERSION)"
+	$(MAKE) -C src amalg
+
+clean:
+	$(MAKE) -C src clean
+
+.PHONY: all install amalg clean
+
+##############################################################################

+ 16 - 0
luajit.mod/luajit/README

@@ -0,0 +1,16 @@
+README for LuaJIT 2.0.4
+-----------------------
+
+LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
+
+Project Homepage: http://luajit.org/
+
+LuaJIT is Copyright (C) 2005-2015 Mike Pall.
+LuaJIT is free software, released under the MIT license.
+See full Copyright Notice in the COPYRIGHT file or in luajit.h.
+
+Documentation for LuaJIT is available in HTML format.
+Please point your favorite browser to:
+
+ doc/luajit.html
+

+ 166 - 0
luajit.mod/luajit/doc/bluequad-print.css

@@ -0,0 +1,166 @@
+/* Copyright (C) 2004-2015 Mike Pall.
+ *
+ * You are welcome to use the general ideas of this design for your own sites.
+ * But please do not steal the stylesheet, the layout or the color scheme.
+ */
+body {
+  font-family: serif;
+  font-size: 11pt;
+  margin: 0 3em;
+  padding: 0;
+  border: none;
+}
+a:link, a:visited, a:hover, a:active {
+  text-decoration: none;
+  background: transparent;
+  color: #0000ff;
+}
+h1, h2, h3 {
+  font-family: sans-serif;
+  font-weight: bold;
+  text-align: left;
+  margin: 0.5em 0;
+  padding: 0;
+}
+h1 {
+  font-size: 200%;
+}
+h2 {
+  font-size: 150%;
+}
+h3 {
+  font-size: 125%;
+}
+p {
+  margin: 0 0 0.5em 0;
+  padding: 0;
+}
+ul, ol {
+  margin: 0.5em 0;
+  padding: 0 0 0 2em;
+}
+ul {
+  list-style: outside square;
+}
+ol {
+  list-style: outside decimal;
+}
+li {
+  margin: 0;
+  padding: 0;
+}
+dl {
+  margin: 1em 0;
+  padding: 1em;
+  border: 1px solid black;
+}
+dt {
+  font-weight: bold;
+  margin: 0;
+  padding: 0;
+}
+dt sup {
+  float: right;
+  margin-left: 1em;
+}
+dd {
+  margin: 0.5em 0 0 2em;
+  padding: 0;
+}
+table {
+  table-layout: fixed;
+  width: 100%;
+  margin: 1em 0;
+  padding: 0;
+  border: 1px solid black;
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+tr {
+  margin: 0;
+  padding: 0;
+  border: none;
+}
+td {
+  text-align: left;
+  margin: 0;
+  padding: 0.2em 0.5em;
+  border-top: 1px solid black;
+  border-bottom: 1px solid black;
+}
+tr.separate td {
+  border-top: double;
+}
+tt, pre, code, kbd, samp {
+  font-family: monospace;
+  font-size: 75%;
+}
+kbd {
+  font-weight: bolder;
+}
+blockquote, pre {
+  margin: 1em 2em;
+  padding: 0;
+}
+img {
+  border: none;
+  vertical-align: baseline;
+  margin: 0;
+  padding: 0;
+}
+img.left {
+  float: left;
+  margin: 0.5em 1em 0.5em 0;
+}
+img.right {
+  float: right;
+  margin: 0.5em 0 0.5em 1em;
+}
+.flush {
+  clear: both;
+  visibility: hidden;
+}
+.hide, .noprint, #nav {
+  display: none !important;
+}
+.pagebreak {
+  page-break-before: always;
+}
+#site {
+  text-align: right;
+  font-family: sans-serif;
+  font-weight: bold;
+  margin: 0 1em;
+  border-bottom: 1pt solid black;
+}
+#site a {
+  font-size: 1.2em;
+}
+#site a:link, #site a:visited {
+  text-decoration: none;
+  font-weight: bold;
+  background: transparent;
+  color: #ffffff;
+}
+#logo {
+  color: #ff8000;
+}
+#head {
+  clear: both;
+  margin: 0 1em;
+}
+#main {
+  line-height: 1.3;
+  text-align: justify;
+  margin: 1em;
+}
+#foot {
+  clear: both;
+  font-size: 80%;
+  text-align: center;
+  margin: 0 1.25em;
+  padding: 0.5em 0 0 0;
+  border-top: 1pt solid black;
+  page-break-before: avoid;
+  page-break-after: avoid;
+}

+ 325 - 0
luajit.mod/luajit/doc/bluequad.css

@@ -0,0 +1,325 @@
+/* Copyright (C) 2004-2015 Mike Pall.
+ *
+ * You are welcome to use the general ideas of this design for your own sites.
+ * But please do not steal the stylesheet, the layout or the color scheme.
+ */
+/* colorscheme:
+ *
+ * site  |  head   #4162bf/white   | #6078bf/#e6ecff
+ * ------+------   ----------------+-------------------
+ * nav   |  main   #bfcfff         | #e6ecff/black
+ *
+ * nav:  hiback   loback     #c5d5ff #b9c9f9
+ *       hiborder loborder   #e6ecff #97a7d7
+ *       link     hover      #2142bf #ff0000
+ *
+ * link: link visited hover  #2142bf #8122bf #ff0000
+ *
+ * main: boxback  boxborder  #f0f4ff #bfcfff
+ */
+body {
+  font-family: Verdana, Arial, Helvetica, sans-serif;
+  font-size: 10pt;
+  margin: 0;
+  padding: 0;
+  border: none;
+  background: #e0e0e0;
+  color: #000000;
+}
+a:link {
+  text-decoration: none;
+  background: transparent;
+  color: #2142bf;
+}
+a:visited {
+  text-decoration: none;
+  background: transparent;
+  color: #8122bf;
+}
+a:hover, a:active {
+  text-decoration: underline;
+  background: transparent;
+  color: #ff0000;
+}
+h1, h2, h3 {
+  font-weight: bold;
+  text-align: left;
+  margin: 0.5em 0;
+  padding: 0;
+  background: transparent;
+}
+h1 {
+  font-size: 200%;
+  line-height: 3em; /* really 6em relative to body, match #site span */
+  margin: 0;
+}
+h2 {
+  font-size: 150%;
+  color: #606060;
+}
+h3 {
+  font-size: 125%;
+  color: #404040;
+}
+p {
+  max-width: 600px;
+  margin: 0 0 0.5em 0;
+  padding: 0;
+}
+b {
+  color: #404040;
+}
+ul, ol {
+  max-width: 600px;
+  margin: 0.5em 0;
+  padding: 0 0 0 2em;
+}
+ul {
+  list-style: outside square;
+}
+ol {
+  list-style: outside decimal;
+}
+li {
+  margin: 0;
+  padding: 0;
+}
+dl {
+  max-width: 600px;
+  margin: 1em 0;
+  padding: 1em;
+  border: 1px solid #bfcfff;
+  background: #f0f4ff;
+}
+dt {
+  font-weight: bold;
+  margin: 0;
+  padding: 0;
+}
+dt sup {
+  float: right;
+  margin-left: 1em;
+  color: #808080;
+}
+dt a:visited {
+  text-decoration: none;
+  color: #2142bf;
+}
+dt a:hover, dt a:active {
+  text-decoration: none;
+  color: #ff0000;
+}
+dd {
+  margin: 0.5em 0 0 2em;
+  padding: 0;
+}
+div.tablewrap { /* for IE *sigh* */
+  max-width: 600px;
+}
+table {
+  table-layout: fixed;
+  border-spacing: 0;
+  border-collapse: collapse;
+  max-width: 600px;
+  width: 100%;
+  margin: 1em 0;
+  padding: 0;
+  border: 1px solid #bfcfff;
+}
+tr {
+  margin: 0;
+  padding: 0;
+  border: none;
+}
+tr.odd {
+  background: #f0f4ff;
+}
+tr.separate td {
+  border-top: 1px solid #bfcfff;
+}
+td {
+  text-align: left;
+  margin: 0;
+  padding: 0.2em 0.5em;
+  border: none;
+}
+tt, code, kbd, samp {
+  font-family: Courier New, Courier, monospace;
+  line-height: 1.2;
+  font-size: 110%;
+}
+kbd {
+  font-weight: bolder;
+}
+blockquote, pre {
+  max-width: 600px;
+  margin: 1em 2em;
+  padding: 0;
+}
+pre {
+  line-height: 1.1;
+}
+pre.code {
+  line-height: 1.4;
+  margin: 0.5em 0 1em 0.5em;
+  padding: 0.5em 1em;
+  border: 1px solid #bfcfff;
+  background: #f0f4ff;
+}
+pre.mark {
+  padding-left: 2em;
+}
+span.codemark {
+  position:absolute;
+  left: 16em;
+  color: #4040c0;
+}
+span.mark {
+  color: #4040c0;
+  font-family: Courier New, Courier, monospace;
+  line-height: 1.1;
+}
+img {
+  border: none;
+  vertical-align: baseline;
+  margin: 0;
+  padding: 0;
+}
+img.left {
+  float: left;
+  margin: 0.5em 1em 0.5em 0;
+}
+img.right {
+  float: right;
+  margin: 0.5em 0 0.5em 1em;
+}
+.indent {
+  padding-left: 1em;
+}
+.flush {
+  clear: both;
+  visibility: hidden;
+}
+.hide, .noscreen {
+  display: none !important;
+}
+.ext {
+  color: #ff8000;
+}
+.new {
+  font-size: 6pt;
+  vertical-align: middle;
+  background: #ff8000;
+  color: #ffffff;
+}
+#site {
+  clear: both;
+  float: left;
+  width: 13em;
+  text-align: center;
+  font-weight: bold;
+  margin: 0;
+  padding: 0;
+  background: transparent;
+  color: #ffffff;
+}
+#site a {
+  font-size: 200%;
+}
+#site a:link, #site a:visited {
+  text-decoration: none;
+  font-weight: bold;
+  background: transparent;
+  color: #ffffff;
+}
+#site span {
+  line-height: 3em; /* really 6em relative to body, match h1 */
+}
+#logo {
+  color: #ffb380;
+}
+#head {
+  margin: 0;
+  padding: 0 0 0 2em;
+  border-left: solid 13em #4162bf;
+  border-right: solid 3em #6078bf;
+  background: #6078bf;
+  color: #e6ecff;
+}
+#nav {
+  clear: both;
+  float: left;
+  overflow: hidden;
+  text-align: left;
+  line-height: 1.5;
+  width: 13em;
+  padding-top: 1em;
+  background: transparent;
+}
+#nav ul {
+  list-style: none outside;
+  margin: 0;
+  padding: 0;
+}
+#nav li {
+  margin: 0;
+  padding: 0;
+}
+#nav a {
+  display: block;
+  text-decoration: none;
+  font-weight: bold;
+  margin: 0;
+  padding: 2px 1em;
+  border-top: 1px solid transparent;
+  border-bottom: 1px solid transparent;
+  background: transparent;
+  color: #2142bf;
+}
+#nav a:hover, #nav a:active {
+  text-decoration: none;
+  border-top: 1px solid #97a7d7;
+  border-bottom: 1px solid #e6ecff;
+  background: #b9c9f9;
+  color: #ff0000;
+}
+#nav a.current, #nav a.current:hover, #nav a.current:active {
+  border-top: 1px solid #e6ecff;
+  border-bottom: 1px solid #97a7d7;
+  background: #c5d5ff;
+  color: #2142bf;
+}
+#nav ul ul a {
+  padding: 0 1em 0 1.7em;
+}
+#nav ul ul ul a {
+  padding: 0 0.5em 0 2.4em;
+}
+#main {
+  line-height: 1.5;
+  text-align: left;
+  margin: 0;
+  padding: 1em 2em;
+  border-left: solid 13em #bfcfff;
+  border-right: solid 3em #e6ecff;
+  background: #e6ecff;
+}
+#foot {
+  clear: both;
+  font-size: 80%;
+  text-align: center;
+  margin: 0;
+  padding: 0.5em;
+  background: #6078bf;
+  color: #ffffff;
+}
+#foot a:link, #foot a:visited {
+  text-decoration: underline;
+  background: transparent;
+  color: #ffffff;
+}
+#foot a:hover, #foot a:active {
+  text-decoration: underline;
+  background: transparent;
+  color: #bfcfff;
+}

+ 978 - 0
luajit.mod/luajit/doc/changes.html

@@ -0,0 +1,978 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>LuaJIT Change History</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>LuaJIT Change History</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a class="current" href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+This is a list of changes between the released versions of LuaJIT.<br>
+The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT&nbsp;2.0.4</strong>.<br>
+</p>
+<p>
+Please check the
+<a href="http://luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Online Change History</a>
+to see whether newer versions are available.
+</p>
+
+<div class="major" style="background: #d0d0ff;">
+<h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 &mdash; 2015-05-14</h2>
+<ul>
+<li>Fix stack check in narrowing optimization.</li>
+<li>Fix Lua/C API typecheck error for special indexes.</li>
+<li>Fix string to number conversion.</li>
+<li>Fix lexer error for chunks without tokens.</li>
+<li>Don't compile <tt>IR_RETF</tt> after <tt>CALLT</tt> to ff with-side effects.</li>
+<li>Fix <tt>BC_UCLO</tt>/<tt>BC_JMP</tt> join optimization in Lua parser.</li>
+<li>Fix corner case in string to number conversion.</li>
+<li>Gracefully handle <tt>lua_error()</tt> for a suspended coroutine.</li>
+<li>Avoid error messages when building with Clang.</li>
+<li>Fix snapshot #0 handling for traces with a stack check on entry.</li>
+<li>Fix fused constant loads under high register pressure.</li>
+<li>Invalidate backpropagation cache after DCE.</li>
+<li>Fix ABC elimination.</li>
+<li>Fix debug info for main chunk of stripped bytecode.</li>
+<li>Fix FOLD rule for <tt>string.sub(s, ...) == k</tt>.</li>
+<li>Fix FOLD rule for <tt>STRREF</tt> of <tt>SNEW</tt>.</li>
+<li>Fix frame traversal while searching for error function.</li>
+<li>Prevent GC estimate miscalculation due to buffer growth.</li>
+<li>Prevent adding side traces for stack checks.</li>
+<li>Fix top slot calculation for snapshots with continuations.</li>
+<li>Fix check for reuse of SCEV results in <tt>FORL</tt>.</li>
+<li>Add PS Vita port.</li>
+<li>Fix compatibility issues with Illumos.</li>
+<li>Fix DragonFly build (unsupported).</li>
+<li>OpenBSD/x86: Better executable memory allocation for W^X mode.</li>
+<li>x86: Fix argument checks for <tt>ipairs()</tt> iterator.</li>
+<li>x86: <tt>lj_math_random_step()</tt> clobbers XMM regs on OSX Clang.</li>
+<li>x86: Fix code generation for unused result of <tt>math.random()</tt>.</li>
+<li>x64: Allow building with <tt>LUAJIT_USE_SYSMALLOC</tt> and <tt>LUAJIT_USE_VALGRIND</tt>.</li>
+<li>x86/x64: Fix argument check for bit shifts.</li>
+<li>x86/x64: Fix code generation for fused test/arith ops.</li>
+<li>ARM: Fix write barrier check in <tt>BC_USETS</tt>.</li>
+<li>PPC: Fix red zone overflow in machine code generation.</li>
+<li>PPC: Don't use <tt>mcrxr</tt> on PPE.</li>
+<li>Various archs: Fix excess stack growth in interpreter.</li>
+<li>FFI: Fix FOLD rule for <tt>TOBIT</tt> + <tt>CONV num.u32</tt>.</li>
+<li>FFI: Prevent DSE across <tt>ffi.string()</tt>.</li>
+<li>FFI: No meta fallback when indexing pointer to incomplete struct.</li>
+<li>FFI: Fix initialization of unions of subtypes.</li>
+<li>FFI: Fix cdata vs. non-cdata arithmetic and comparisons.</li>
+<li>FFI: Fix <tt>__index</tt>/<tt>__newindex</tt> metamethod resolution for ctypes.</li>
+<li>FFI: Fix compilation of reference field access.</li>
+<li>FFI: Fix frame traversal for backtraces with FFI callbacks.</li>
+<li>FFI: Fix recording of indexing a struct pointer ctype object itself.</li>
+<li>FFI: Allow non-scalar cdata to be compared for equality by address.</li>
+<li>FFI: Fix pseudo type conversions for type punning.</li>
+</ul>
+
+<h2 id="LuaJIT-2.0.3">LuaJIT 2.0.3 &mdash; 2014-03-12</h2>
+<ul>
+<li>Add PS4 port.</li>
+<li>Add support for multilib distro builds.</li>
+<li>Fix OSX build.</li>
+<li>Fix MinGW build.</li>
+<li>Fix Xbox 360 build.</li>
+<li>Improve ULOAD forwarding for open upvalues.</li>
+<li>Fix GC steps threshold handling when called by JIT-compiled code.</li>
+<li>Fix argument checks for <tt>math.deg()</tt> and <tt>math.rad()</tt>.</li>
+<li>Fix <tt>jit.flush(func|true)</tt>.</li>
+<li>Respect <tt>jit.off(func)</tt> when returning to a function, too.</li>
+<li>Fix compilation of <tt>string.byte(s, nil, n)</tt>.</li>
+<li>Fix line number for relocated bytecode after closure fixup</li>
+<li>Fix frame traversal for backtraces.</li>
+<li>Fix ABC elimination.</li>
+<li>Fix handling of redundant PHIs.</li>
+<li>Fix snapshot restore for exit to function header.</li>
+<li>Fix type punning alias analysis for constified pointers</li>
+<li>Fix call unroll checks in the presence of metamethod frames.</li>
+<li>Fix initial maxslot for down-recursive traces.</li>
+<li>Prevent BASE register coalescing if parent uses <tt>IR_RETF</tt>.</li>
+<li>Don't purge modified function from stack slots in <tt>BC_RET</tt>.</li>
+<li>Fix recording of <tt>BC_VARG</tt>.</li>
+<li>Don't access dangling reference to reallocated IR.</li>
+<li>Fix frame depth display for bytecode dump in <tt>-jdump</tt>.</li>
+<li>ARM: Fix register allocation when rematerializing FPRs.</li>
+<li>x64: Fix store to upvalue for lightuserdata values.</li>
+<li>FFI: Add missing GC steps for callback argument conversions.</li>
+<li>FFI: Properly unload loaded DLLs.</li>
+<li>FFI: Fix argument checks for <tt>ffi.string()</tt>.</li>
+<li>FFI/x64: Fix passing of vector arguments to calls.</li>
+<li>FFI: Rehash finalizer table after GC cycle, if needed.</li>
+<li>FFI: Fix <tt>cts-&gt;L</tt> for cdata unsinking in snapshot restore.</li>
+</ul>
+
+<h2 id="LuaJIT-2.0.2">LuaJIT 2.0.2 &mdash; 2013-06-03</h2>
+<ul>
+<li>Fix memory access check for fast string interning.</li>
+<li>Fix MSVC intrinsics for older versions.</li>
+<li>Add missing GC steps for <tt>io.*</tt> functions.</li>
+<li>Fix spurious red zone overflows in machine code generation.</li>
+<li>Fix jump-range constrained mcode allocation.</li>
+<li>Inhibit DSE for implicit loads via calls.</li>
+<li>Fix builtin string to number conversion for overflow digits.</li>
+<li>Fix optional argument handling while recording builtins.</li>
+<li>Fix optional argument handling in <tt>table.concat()</tt>.</li>
+<li>Add partial support for building with MingW64 GCC 4.8-SEH.</li>
+<li>Add missing PHI barrier to <tt>string.sub(str, a, b) == kstr</tt> FOLD rule.</li>
+<li>Fix compatibility issues with Illumos.</li>
+<li>ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.</li>
+<li>MIPS: Fix cache flush/sync for JIT-compiled code jump area.</li>
+<li>PPC: Add <tt>plt</tt> suffix for external calls from assembler code.</li>
+<li>FFI: Fix snapshot substitution in SPLIT pass.</li>
+<li>FFI/x86: Fix register allocation for 64 bit comparisons.</li>
+<li>FFI: Fix tailcall in lowest frame to C&nbsp;function with bool result.</li>
+<li>FFI: Ignore <tt>long</tt> type specifier in <tt>ffi.istype()</tt>.</li>
+<li>FFI: Fix calling conventions for 32 bit OSX and iOS simulator (struct returns).</li>
+<li>FFI: Fix calling conventions for ARM hard-float EABI (nested structs).</li>
+<li>FFI: Improve error messages for arithmetic and comparison operators.</li>
+<li>FFI: Insert no-op type conversion for pointer to integer cast.</li>
+<li>FFI: Fix unroll limit for <tt>ffi.fill()</tt>.</li>
+<li>FFI: Must sink <tt>XBAR</tt> together with <tt>XSTORE</tt>s.</li>
+<li>FFI: Preserve intermediate string for <tt>const&nbsp;char&nbsp;*</tt> conversion.</li>
+</ul>
+
+<h2 id="LuaJIT-2.0.1">LuaJIT 2.0.1 &mdash; 2013-02-19</h2>
+<ul>
+<li>Don't clear frame for out-of-memory error.</li>
+<li>Leave hook when resume catches error thrown from hook.</li>
+<li>Add missing GC steps for template table creation.</li>
+<li>Fix discharge order of comparisons in Lua parser.</li>
+<li>Improve buffer handling for <tt>io.read()</tt>.</li>
+<li>OSX: Add support for Mach-O object files to <tt>-b</tt> option.</li>
+<li>Fix PS3 port.</li>
+<li>Fix/enable Xbox 360 port.</li>
+<li>x86/x64: Always mark ref for shift count as non-weak.</li>
+<li>x64: Don't fuse implicitly 32-to-64 extended operands.</li>
+<li>ARM: Fix armhf call argument handling.</li>
+<li>ARM: Fix code generation for integer math.min/math.max.</li>
+<li>PPC/e500: Fix <tt>lj_vm_floor()</tt> for Inf/NaN.</li>
+<li>FFI: Change priority of table initializer variants for structs.</li>
+<li>FFI: Fix code generation for bool call result check on x86/x64.</li>
+<li>FFI: Load FFI library on-demand for bytecode with cdata literals.</li>
+<li>FFI: Fix handling of qualified transparent structs/unions.</li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0">LuaJIT 2.0.0 &mdash; 2012-11-08</h2>
+<ul>
+<li>Correctness and completeness:
+<ul>
+  <li>Fix Android/x86 build.</li>
+  <li>Fix recording of equality comparisons with <tt>__eq</tt> metamethods.</li>
+  <li>Fix detection of immutable upvalues.</li>
+  <li>Replace error with PANIC for callbacks from JIT-compiled code.</li>
+  <li>Fix builtin string to number conversion for <tt>INT_MIN</tt>.</li>
+  <li>Don't create unneeded array part for template tables.</li>
+  <li>Fix <tt>CONV.num.int</tt> sinking.</li>
+  <li>Don't propagate implicitly widened number to index metamethods.</li>
+  <li>ARM: Fix ordered comparisons of number vs. non-number.</li>
+  <li>FFI: Fix code generation for replay of sunk float fields.</li>
+  <li>FFI: Fix signedness of bool.</li>
+  <li>FFI: Fix recording of bool call result check on x86/x64.</li>
+  <li>FFI: Fix stack-adjustment for <tt>__thiscall</tt> callbacks.</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta11">LuaJIT 2.0.0-beta11 &mdash; 2012-10-16</h2>
+<ul>
+<li>New features:
+<ul>
+  <li>Use ARM VFP instructions, if available (build-time detection).</li>
+  <li>Add support for ARM hard-float EABI (<tt>armhf</tt>).</li>
+  <li>Add PS3 port.</li>
+  <li>Add many features from Lua&nbsp;5.2, e.g. <tt>goto</tt>/labels.
+  Refer to <a href="extensions.html#lua52">this list</a>.</li>
+  <li>FFI: Add parameterized C types.</li>
+  <li>FFI: Add support for copy constructors.</li>
+  <li>FFI: Equality comparisons never raise an error (treat as unequal instead).</li>
+  <li>FFI: Box all accessed or returned enums.</li>
+  <li>FFI: Check for <tt>__new</tt> metamethod when calling a constructor.</li>
+  <li>FFI: Handle <tt>__pairs</tt>/<tt>__ipairs</tt> metamethods for cdata objects.</li>
+  <li>FFI: Convert <tt>io.*</tt> file handle to <tt>FILE *</tt> pointer (but as a <tt>void *</tt>).</li>
+  <li>FFI: Detect and support type punning through unions.</li>
+  <li>FFI: Improve various error messages.</li>
+</ul></li>
+<li>Build-system reorganization:
+<ul>
+  <li>Reorganize directory layout:<br>
+  <tt>lib/*</tt> &rarr; <tt>src/jit/*</tt><br>
+  <tt>src/buildvm_*.dasc</tt> &rarr; <tt>src/vm_*.dasc</tt><br>
+  <tt>src/buildvm_*.h</tt> &rarr; removed<br>
+  <tt>src/buildvm*</tt> &rarr; <tt>src/host/*</tt></li>
+  <li>Add minified Lua interpreter plus Lua BitOp (<tt>minilua</tt>) to run DynASM.</li>
+  <li>Change DynASM bit operations to use Lua BitOp</li>
+  <li>Translate only <tt>vm_*.dasc</tt> for detected target architecture.</li>
+  <li>Improve target detection for <tt>msvcbuild.bat</tt>.</li>
+  <li>Fix build issues on Cygwin and MinGW with optional MSys.</li>
+  <li>Handle cross-compiles with FPU/no-FPU or hard-fp/soft-fp ABI mismatch.</li>
+  <li>Remove some library functions for no-JIT/no-FFI builds.</li>
+  <li>Add uninstall target to top-level Makefile.</li>
+</ul></li>
+<li>Correctness and completeness:
+<ul>
+  <li>Preserve snapshot #0 PC for all traces.</li>
+  <li>Fix argument checks for <tt>coroutine.create()</tt>.</li>
+  <li>Command line prints version and JIT status to <tt>stdout</tt>, not <tt>stderr</tt>.</li>
+  <li>Fix userdata <tt>__gc</tt> separations at Lua state close.</li>
+  <li>Fix <tt>TDUP</tt> to <tt>HLOAD</tt> forwarding for <tt>LJ_DUALNUM</tt> builds.</li>
+  <li>Fix buffer check in bytecode writer.</li>
+  <li>Make <tt>os.date()</tt> thread-safe.</li>
+  <li>Add missing declarations for MSVC intrinsics.</li>
+  <li>Fix dispatch table modifications for return hooks.</li>
+  <li>Workaround for MSVC conversion bug (<tt>double</tt> &rarr; <tt>uint32_t</tt> &rarr; <tt>int32_t</tt>).</li>
+  <li>Fix FOLD rule <tt>(i-j)-i => 0-j</tt>.</li>
+  <li>Never use DWARF unwinder on Windows.</li>
+  <li>Fix shrinking of direct mapped blocks in builtin allocator.</li>
+  <li>Limit recursion depth in <tt>string.match()</tt> et al.</li>
+  <li>Fix late despecialization of <tt>ITERN</tt> after loop has been entered.</li>
+  <li>Fix <tt>'f'</tt> and <tt>'L'</tt> options for <tt>debug.getinfo()</tt> and <tt>lua_getinfo()</tt>.</li>
+  <li>Fix <tt>package.searchpath()</tt>.</li>
+  <li>OSX: Change dylib names to be consistent with other platforms.</li>
+  <li>Android: Workaround for broken <tt>sprintf("%g",&nbsp;-0.0)</tt>.</li>
+  <li>x86: Remove support for ancient CPUs without <tt>CMOV</tt> (before Pentium Pro).</li>
+  <li>x86: Fix register allocation for calls returning register pair.</li>
+  <li>x86/x64: Fix fusion of unsigned byte comparisons with swapped operands.</li>
+  <li>ARM: Fix <tt>tonumber()</tt> argument check.</li>
+  <li>ARM: Fix modulo operator and <tt>math.floor()</tt>/<tt>math.ceil()</tt> for <tt>inf</tt>/<tt>nan</tt>.</li>
+  <li>ARM: Invoke SPLIT pass for leftover <tt>IR_TOBIT</tt>.</li>
+  <li>ARM: Fix BASE register coalescing.</li>
+  <li>PPC: Fix interpreter state setup in callbacks.</li>
+  <li>PPC: Fix <tt>string.sub()</tt> range check.</li>
+  <li>MIPS: Support generation of MIPS/MIPSEL bytecode object files.</li>
+  <li>MIPS: Fix calls to <tt>floor()</tt>/<tt>ceil()</tt><tt>/trunc()</tt>.</li>
+  <li>ARM/PPC: Detect more target architecture variants.</li>
+  <li>ARM/PPC/e500/MIPS: Fix tailcalls from fast functions, esp. <tt>tostring()</tt>.</li>
+  <li>ARM/PPC/MIPS: Fix rematerialization of FP constants.</li>
+  <li>FFI: Don't call <tt>FreeLibrary()</tt> on our own EXE/DLL.</li>
+  <li>FFI: Resolve metamethods for constructors, too.</li>
+  <li>FFI: Properly disable callbacks on iOS (would require executable memory).</li>
+  <li>FFI: Fix cdecl string parsing during recording.</li>
+  <li>FFI: Show address pointed to for <tt>tostring(ref)</tt>, too.</li>
+  <li>FFI: Fix alignment of C call argument/return structure.</li>
+  <li>FFI: Initialize all fields of standard types.</li>
+  <li>FFI: Fix callback handling when new C&nbsp;types are declared in callback.</li>
+  <li>FFI: Fix recording of constructors for pointers.</li>
+  <li>FFI: Always resolve metamethods for pointers to structs.</li>
+  <li>FFI: Correctly propagate alignment when interning nested types.</li>
+</ul></li>
+<li>Structural and performance enhancements:
+<ul>
+  <li>Add allocation sinking and store sinking optimization.</li>
+  <li>Constify immutable upvalues.</li>
+  <li>Add builtin string to integer or FP number conversion. Improves cross-platform consistency and correctness.</li>
+  <li>Create string hash slots in template tables for non-const values, too. Avoids later table resizes.</li>
+  <li>Eliminate <tt>HREFK</tt> guard for template table references.</li>
+  <li>Add various new FOLD rules.</li>
+  <li>Don't use stack unwinding for <tt>lua_yield()</tt> (slow on x64).</li>
+  <li>ARM, PPC, MIPS: Improve <tt>XLOAD</tt> operand fusion and register hinting.</li>
+  <li>PPC, MIPS: Compile <tt>math.sqrt()</tt> to sqrt instruction, if available.</li>
+  <li>FFI: Fold <tt>KPTR</tt> + constant offset in SPLIT pass.</li>
+  <li>FFI: Optimize/inline <tt>ffi.copy()</tt> and <tt>ffi.fill()</tt>.</li>
+  <li>FFI: Compile and optimize array/struct copies.</li>
+  <li>FFI: Compile <tt>ffi.typeof(cdata|ctype)</tt>, <tt>ffi.sizeof()</tt>, <tt>ffi.alignof()</tt>, <tt>ffi.offsetof()</tt> and <tt>ffi.gc()</tt>.</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta10">LuaJIT 2.0.0-beta10 &mdash; 2012-05-09</h2>
+<ul>
+<li>New features:
+<ul>
+<li>The MIPS of LuaJIT is complete. It requires a CPU conforming to the
+MIPS32&nbsp;R1 architecture with hardware FPU. O32 hard-fp ABI,
+little-endian or big-endian.</li>
+<li>Auto-detect target arch via cross-compiler. No need for
+<tt>TARGET=arch</tt> anymore.</li>
+<li>Make DynASM compatible with Lua 5.2.</li>
+<li>From Lua 5.2: Try <tt>__tostring</tt> metamethod on non-string error
+messages..</li>
+</ul></li>
+<li>Correctness and completeness:
+<ul>
+<li>Fix parsing of hex literals with exponents.</li>
+<li>Fix bytecode dump for certain number constants.</li>
+<li>Fix argument type in error message for relative arguments.</li>
+<li>Fix argument error handling on Lua stacks without a frame.</li>
+<li>Add missing mcode limit check in assembler backend.</li>
+<li>Fix compilation on OpenBSD.</li>
+<li>Avoid recursive GC steps after GC-triggered trace exit.</li>
+<li>Replace <tt>&lt;unwind.h&gt;</tt> definitions with our own.</li>
+<li>Fix OSX build issues. Bump minimum required OSX version to 10.4.</li>
+<li>Fix discharge order of comparisons in Lua parser.</li>
+<li>Ensure running <tt>__gc</tt> of userdata created in <tt>__gc</tt>
+at state close.</li>
+<li>Limit number of userdata <tt>__gc</tt> separations at state close.</li>
+<li>Fix bytecode <tt>JMP</tt> slot range when optimizing
+<tt>and</tt>/<tt>or</tt> with constant LHS.</li>
+<li>Fix DSE of <tt>USTORE</tt>.</li>
+<li>Make <tt>lua_concat()</tt> work from C&nbsp;hook with partial frame.</li>
+<li>Add required PHIs for implicit conversions, e.g. via <tt>XREF</tt>
+forwarding.</li>
+<li>Add more comparison variants to Valgrind suppressions file.</li>
+<li>Disable loading bytecode with an extra header (BOM or <tt>#!</tt>).</li>
+<li>Fix PHI stack slot syncing.</li>
+<li>ARM: Reorder type/value tests to silence Valgrind.</li>
+<li>ARM: Fix register allocation for <tt>ldrd</tt>-optimized
+<tt>HREFK</tt>.</li>
+<li>ARM: Fix conditional branch fixup for <tt>OBAR</tt>.</li>
+<li>ARM: Invoke SPLIT pass for <tt>double</tt> args in FFI call.</li>
+<li>ARM: Handle all <tt>CALL*</tt> ops with <tt>double</tt> results in
+SPLIT pass.</li>
+<li>ARM: Fix rejoin of <tt>POW</tt> in SPLIT pass.</li>
+<li>ARM: Fix compilation of <tt>math.sinh</tt>, <tt>math.cosh</tt>,
+<tt>math.tanh</tt>.</li>
+<li>ARM, PPC: Avoid pointless arg clearing in <tt>BC_IFUNCF</tt>.</li>
+<li>PPC: Fix resume after yield from hook.</li>
+<li>PPC: Fix argument checking for <tt>rawget()</tt>.</li>
+<li>PPC: Fix fusion of floating-point <tt>XLOAD</tt>/<tt>XSTORE</tt>.</li>
+<li>PPC: Fix <tt>HREFK</tt> code generation for huge tables.</li>
+<li>PPC: Use builtin D-Cache/I-Cache sync code.</li>
+</ul></li>
+<li>FFI library:
+<ul>
+<li>Ignore empty statements in <tt>ffi.cdef()</tt>.</li>
+<li>Ignore number parsing errors while skipping definitions.</li>
+<li>Don't touch frame in callbacks with tailcalls to fast functions.</li>
+<li>Fix library unloading on POSIX systems.</li>
+<li>Finalize cdata before userdata when closing the state.</li>
+<li>Change <tt>ffi.load()</tt> library name resolution for Cygwin.</li>
+<li>Fix resolving of function name redirects on Windows/x86.</li>
+<li>Fix symbol resolving error messages on Windows.</li>
+<li>Fix blacklisting of C functions calling callbacks.</li>
+<li>Fix result type of pointer difference.</li>
+<li>Use correct PC in FFI metamethod error message.</li>
+<li>Allow <tt>'typedef _Bool int BOOL;'</tt> for the Windows API.</li>
+<li>Don't record test for bool result of call, if ignored.</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta9">LuaJIT 2.0.0-beta9 &mdash; 2011-12-14</h2>
+<ul>
+<li>New features:
+<ul>
+<li>PPC port of LuaJIT is complete. Default is the dual-number port
+(usually faster). Single-number port selectable via <tt>src/Makefile</tt>
+at build time.</li>
+<li>Add FFI callback support.</li>
+<li>Extend <tt>-b</tt> to generate <tt>.c</tt>, <tt>.h</tt> or <tt>.obj/.o</tt>
+files with embedded bytecode.</li>
+<li>Allow loading embedded bytecode with <tt>require()</tt>.</li>
+<li>From Lua 5.2: Change to <tt>'\z'</tt> escape. Reject undefined escape
+sequences.</li>
+</ul></li>
+<li>Correctness and completeness:
+<ul>
+<li>Fix OSX 10.7 build. Fix <tt>install_name</tt> and versioning on OSX.</li>
+<li>Fix iOS build.</li>
+<li>Install <tt>dis_arm.lua</tt>, too.</li>
+<li>Mark installed shared library as executable.</li>
+<li>Add debug option to <tt>msvcbuild.bat</tt> and improve error handling.</li>
+<li>Fix data-flow analysis for iterators.</li>
+<li>Fix forced unwinding triggered by external unwinder.</li>
+<li>Record missing <tt>for</tt> loop slot loads (return to lower frame).</li>
+<li>Always use ANSI variants of Windows system functions.</li>
+<li>Fix GC barrier for multi-result table constructor (<tt>TSETM</tt>).</li>
+<li>Fix/add various FOLD rules.</li>
+<li>Add potential PHI for number conversions due to type instability.</li>
+<li>Do not eliminate PHIs only referenced from other PHIs.</li>
+<li>Correctly anchor implicit number to string conversions in Lua/C API.</li>
+<li>Fix various stack limit checks.</li>
+<li>x64: Use thread-safe exceptions for external unwinding (GCC platforms).</li>
+<li>x64: Fix result type of cdata index conversions.</li>
+<li>x64: Fix <tt>math.random()</tt> and <tt>bit.bswap()</tt> code generation.</li>
+<li>x64: Fix <tt>lightuserdata</tt> comparisons.</li>
+<li>x64: Always extend stack-passed arguments to pointer size.</li>
+<li>ARM: Many fixes to code generation backend.</li>
+<li>PPC/e500: Fix dispatch for binop metamethods.</li>
+<li>PPC/e500: Save/restore condition registers when entering/leaving the VM.</li>
+<li>PPC/e500: Fix write barrier in stores of strings to upvalues.</li>
+</ul></li>
+<li>FFI library:
+<ul>
+<li>Fix C comment parsing.</li>
+<li>Fix snapshot optimization for cdata comparisons.</li>
+<li>Fix recording of const/enum lookups in namespaces.</li>
+<li>Fix call argument and return handling for <tt>I8/U8/I16/U16</tt> types.</li>
+<li>Fix unfused loads of float fields.</li>
+<li>Fix <tt>ffi.string()</tt> recording.</li>
+<li>Save <tt>GetLastError()</tt> around <tt>ffi.load()</tt> and symbol
+resolving, too.</li>
+<li>Improve ld script detection in <tt>ffi.load()</tt>.</li>
+<li>Record loads/stores to external variables in namespaces.</li>
+<li>Compile calls to stdcall, fastcall and vararg functions.</li>
+<li>Treat function ctypes like pointers in comparisons.</li>
+<li>Resolve <tt>__call</tt> metamethod for pointers, too.</li>
+<li>Record C function calls with bool return values.</li>
+<li>Record <tt>ffi.errno()</tt>.</li>
+<li>x86: Fix number to <tt>uint32_t</tt> conversion rounding.</li>
+<li>x86: Fix 64 bit arithmetic in assembler backend.</li>
+<li>x64: Fix struct-by-value calling conventions.</li>
+<li>ARM: Ensure invocation of SPLIT pass for float conversions.</li>
+</ul></li>
+<li>Structural and performance enhancements:
+<ul>
+<li>Display trace types with <tt>-jv</tt> and <tt>-jdump</tt>.</li>
+<li>Record isolated calls. But prefer recording loops over calls.</li>
+<li>Specialize to prototype for non-monomorphic functions. Solves the
+trace-explosion problem for closure-heavy programming styles.</li>
+<li>Always generate a portable <tt>vmdef.lua</tt>. Easier for distros.</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta8">LuaJIT 2.0.0-beta8 &mdash; 2011-06-23</h2>
+<ul>
+<li>New features:
+<ul>
+<li>Soft-float ARM port of LuaJIT is complete.</li>
+<li>Add support for bytecode loading/saving and <tt>-b</tt> command line
+option.</li>
+<li>From Lua 5.2: <tt>__len</tt> metamethod for tables
+(disabled by default).</li>
+</ul></li>
+<li>Correctness and completeness:
+<ul>
+<li>ARM: Misc. fixes for interpreter.</li>
+<li>x86/x64: Fix <tt>bit.*</tt> argument checking in interpreter.</li>
+<li>Catch early out-of-memory in memory allocator initialization.</li>
+<li>Fix data-flow analysis for paths leading to an upvalue close.</li>
+<li>Fix check for missing arguments in <tt>string.format()</tt>.</li>
+<li>Fix Solaris/x86 build (note: not a supported target).</li>
+<li>Fix recording of loops with instable directions in side traces.</li>
+<li>x86/x64: Fix fusion of comparisons with <tt>u8</tt>/<tt>u16</tt>
+<tt>XLOAD</tt>.</li>
+<li>x86/x64: Fix register allocation for variable shifts.</li>
+</ul></li>
+<li>FFI library:
+<ul>
+<li>Add <tt>ffi.errno()</tt>. Save <tt>errno</tt>/<tt>GetLastError()</tt>
+around allocations etc.</li>
+<li>Fix <tt>__gc</tt> for VLA/VLS cdata objects.</li>
+<li>Fix recording of casts from 32 bit cdata pointers to integers.</li>
+<li><tt>tonumber(cdata)</tt> returns <tt>nil</tt> for non-numbers.</li>
+<li>Show address pointed to for <tt>tostring(pointer)</tt>.</li>
+<li>Print <tt>NULL</tt> pointers as <tt>"cdata&lt;... *&gt;: NULL"</tt>.</li>
+<li>Support <tt>__tostring</tt> metamethod for pointers to structs, too.</li>
+</ul></li>
+<li>Structural and performance enhancements:
+<ul>
+<li>More tuning for loop unrolling heuristics.</li>
+<li>Flatten and compress in-memory debug info (saves ~70%).</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta7">LuaJIT 2.0.0-beta7 &mdash; 2011-05-05</h2>
+<ul>
+<li>New features:
+<ul>
+<li>ARM port of the LuaJIT interpreter is complete.</li>
+<li>FFI library: Add <tt>ffi.gc()</tt>, <tt>ffi.metatype()</tt>,
+<tt>ffi.istype()</tt>.</li>
+<li>FFI library: Resolve ld script redirection in <tt>ffi.load()</tt>.</li>
+<li>From Lua 5.2: <tt>package.searchpath()</tt>, <tt>fp:read("*L")</tt>,
+<tt>load(string)</tt>.</li>
+<li>From Lua 5.2, disabled by default: empty statement,
+<tt>table.unpack()</tt>, modified <tt>coroutine.running()</tt>.</li>
+</ul></li>
+<li>Correctness and completeness:
+<ul>
+<li>FFI library: numerous fixes.</li>
+<li>Fix type mismatches in store-to-load forwarding.</li>
+<li>Fix error handling within metamethods.</li>
+<li>Fix <tt>table.maxn()</tt>.</li>
+<li>Improve accuracy of <tt>x^-k</tt> on x64.</li>
+<li>Fix code generation for Intel Atom in x64 mode.</li>
+<li>Fix narrowing of POW.</li>
+<li>Fix recording of retried fast functions.</li>
+<li>Fix code generation for <tt>bit.bnot()</tt> and multiplies.</li>
+<li>Fix error location within cpcall frames.</li>
+<li>Add workaround for old libgcc unwind bug.</li>
+<li>Fix <tt>lua_yield()</tt> and <tt>getmetatable(lightuserdata)</tt> on x64.</li>
+<li>Misc. fixes for PPC/e500 interpreter.</li>
+<li>Fix stack slot updates for down-recursion.</li>
+</ul></li>
+<li>Structural and performance enhancements:
+<ul>
+<li>Add dual-number mode (int/double) for the VM. Enabled for ARM.</li>
+<li>Improve narrowing of arithmetic operators and <tt>for</tt> loops.</li>
+<li>Tune loop unrolling heuristics and increase trace recorder limits.</li>
+<li>Eliminate dead slots in snapshots using bytecode data-flow analysis.</li>
+<li>Avoid phantom stores to proxy tables.</li>
+<li>Optimize lookups in empty proxy tables.</li>
+<li>Improve bytecode optimization of <tt>and</tt>/<tt>or</tt> operators.</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta6">LuaJIT 2.0.0-beta6 &mdash; 2011-02-11</h2>
+<ul>
+<li>New features:
+<ul>
+<li>PowerPC/e500v2 port of the LuaJIT interpreter is complete.</li>
+<li>Various minor features from Lua 5.2: Hex escapes in literals,
+<tt>'\*'</tt> escape, reversible <tt>string.format("%q",s)</tt>,
+<tt>"%g"</tt> pattern, <tt>table.sort</tt> checks callbacks,
+<tt>os.exit(status|true|false[,close])</tt>.</li>
+<li>Lua 5.2 <tt>__pairs</tt> and <tt>__ipairs</tt> metamethods
+(disabled by default).</li>
+<li>Initial release of the FFI library.</li>
+</ul></li>
+<li>Correctness and completeness:
+<ul>
+<li>Fix <tt>string.format()</tt> for non-finite numbers.</li>
+<li>Fix memory leak when compiled to use the built-in allocator.</li>
+<li>x86/x64: Fix unnecessary resize in <tt>TSETM</tt> bytecode.</li>
+<li>Fix various GC issues with traces and <tt>jit.flush()</tt>.</li>
+<li>x64: Fix fusion of indexes for array references.</li>
+<li>x86/x64: Fix stack overflow handling for coroutine results.</li>
+<li>Enable low-2GB memory allocation on FreeBSD/x64.</li>
+<li>Fix <tt>collectgarbage("count")</tt> result if more than 2GB is in use.</li>
+<li>Fix parsing of hex floats.</li>
+<li>x86/x64: Fix loop branch inversion with trailing
+<tt>HREF+NE/EQ</tt>.</li>
+<li>Add <tt>jit.os</tt> string.</li>
+<li><tt>coroutine.create()</tt> permits running C functions, too.</li>
+<li>Fix OSX build to work with newer ld64 versions.</li>
+<li>Fix bytecode optimization of <tt>and</tt>/<tt>or</tt> operators.</li>
+</ul></li>
+<li>Structural and performance enhancements:
+<ul>
+<li>Emit specialized bytecode for <tt>pairs()</tt>/<tt>next()</tt>.</li>
+<li>Improve bytecode coalescing of <tt>nil</tt> constants.</li>
+<li>Compile calls to vararg functions.</li>
+<li>Compile <tt>select()</tt>.</li>
+<li>Improve alias analysis, esp. for loads from allocations.</li>
+<li>Tuning of various compiler heuristics.</li>
+<li>Refactor and extend IR conversion instructions.</li>
+<li>x86/x64: Various backend enhancements related to the FFI.</li>
+<li>Add SPLIT pass to split 64 bit IR instructions for 32 bit CPUs.</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta5">LuaJIT 2.0.0-beta5 &mdash; 2010-08-24</h2>
+<ul>
+<li>Correctness and completeness:
+<ul>
+<li>Fix trace exit dispatch to function headers.</li>
+<li>Fix Windows and OSX builds with LUAJIT_DISABLE_JIT.</li>
+<li>Reorganize and fix placement of generated machine code on x64.</li>
+<li>Fix TNEW in x64 interpreter.</li>
+<li>Do not eliminate PHIs for values only referenced from side exits.</li>
+<li>OS-independent canonicalization of strings for non-finite numbers.</li>
+<li>Fix <tt>string.char()</tt> range check on x64.</li>
+<li>Fix <tt>tostring()</tt> resolving within <tt>print()</tt>.</li>
+<li>Fix error handling for <tt>next()</tt>.</li>
+<li>Fix passing of constant arguments to external calls on x64.</li>
+<li>Fix interpreter argument check for two-argument SSE math functions.</li>
+<li>Fix C frame chain corruption caused by <tt>lua_cpcall()</tt>.</li>
+<li>Fix return from <tt>pcall()</tt> within active hook.</li>
+</ul></li>
+<li>Structural and performance enhancements:
+<ul>
+<li>Replace on-trace GC frame syncing with interpreter exit.</li>
+<li>Improve hash lookup specialization by not removing dead keys during GC.</li>
+<li>Turn traces into true GC objects.</li>
+<li>Avoid starting a GC cycle immediately after library init.</li>
+<li>Add weak guards to improve dead-code elimination.</li>
+<li>Speed up string interning.</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta4">LuaJIT 2.0.0-beta4 &mdash; 2010-03-28</h2>
+<ul>
+<li>Correctness and completeness:
+<ul>
+<li>Fix precondition for on-trace creation of table keys.</li>
+<li>Fix <tt>{f()}</tt> on x64 when table is resized.</li>
+<li>Fix folding of ordered comparisons with same references.</li>
+<li>Fix snapshot restores for multi-result bytecodes.</li>
+<li>Fix potential hang when recording bytecode with nested closures.</li>
+<li>Fix recording of <tt>getmetatable()</tt>, <tt>tonumber()</tt> and bad argument types.</li>
+<li>Fix SLOAD fusion across returns to lower frames.</li>
+</ul></li>
+<li>Structural and performance enhancements:
+<ul>
+<li>Add array bounds check elimination. <tt>-Oabc</tt> is enabled by default.</li>
+<li>More tuning for x64, e.g. smaller table objects.</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta3">LuaJIT 2.0.0-beta3 &mdash; 2010-03-07</h2>
+<ul>
+<li>LuaJIT x64 port:
+<ul>
+<li>Port integrated memory allocator to Linux/x64, Windows/x64 and OSX/x64.</li>
+<li>Port interpreter and JIT compiler to x64.</li>
+<li>Port DynASM to x64.</li>
+<li>Many 32/64 bit cleanups in the VM.</li>
+<li>Allow building the interpreter with either x87 or SSE2 arithmetics.</li>
+<li>Add external unwinding and C++ exception interop (default on x64).</li>
+</ul></li>
+<li>Correctness and completeness:
+<ul>
+<li>Fix constructor bytecode generation for certain conditional values.</li>
+<li>Fix some cases of ordered string comparisons.</li>
+<li>Fix <tt>lua_tocfunction()</tt>.</li>
+<li>Fix cutoff register in JMP bytecode for some conditional expressions.</li>
+<li>Fix PHI marking algorithm for references from variant slots.</li>
+<li>Fix <tt>package.cpath</tt> for non-default PREFIX.</li>
+<li>Fix DWARF2 frame unwind information for interpreter on OSX.</li>
+<li>Drive the GC forward on string allocations in the parser.</li>
+<li>Implement call/return hooks (zero-cost if disabled).</li>
+<li>Implement yield from C hooks.</li>
+<li>Disable JIT compiler on older non-SSE2 CPUs instead of aborting.</li>
+</ul></li>
+<li>Structural and performance enhancements:
+<ul>
+<li>Compile recursive code (tail-, up- and down-recursion).</li>
+<li>Improve heuristics for bytecode penalties and blacklisting.</li>
+<li>Split CALL/FUNC recording and clean up fast function call semantics.</li>
+<li>Major redesign of internal function call handling.</li>
+<li>Improve FOR loop const specialization and integerness checks.</li>
+<li>Switch to pre-initialized stacks. Avoid frame-clearing.</li>
+<li>Colocation of prototypes and related data: bytecode, constants, debug info.</li>
+<li>Cleanup parser and streamline bytecode generation.</li>
+<li>Add support for weak IR references to register allocator.</li>
+<li>Switch to compressed, extensible snapshots.</li>
+<li>Compile returns to frames below the start frame.</li>
+<li>Improve alias analysis of upvalues using a disambiguation hash value.</li>
+<li>Compile floor/ceil/trunc to SSE2 helper calls or SSE4.1 instructions.</li>
+<li>Add generic C call handling to IR and backend.</li>
+<li>Improve KNUM fuse vs. load heuristics.</li>
+<li>Compile various <tt>io.*()</tt> functions.</li>
+<li>Compile <tt>math.sinh()</tt>, <tt>math.cosh()</tt>, <tt>math.tanh()</tt>
+and <tt>math.random()</tt>.</li>
+</ul></li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 &mdash; 2009-11-09</h2>
+<ul>
+<li>Reorganize build system. Build static+shared library on POSIX.</li>
+<li>Allow C++ exception conversion on all platforms
+using a wrapper function.</li>
+<li>Automatically catch C++ exceptions and rethrow Lua error
+(DWARF2 only).</li>
+<li>Check for the correct x87 FPU precision at strategic points.</li>
+<li>Always use wrappers for libm functions.</li>
+<li>Resurrect metamethod name strings before copying them.</li>
+<li>Mark current trace, even if compiler is idle.</li>
+<li>Ensure FILE metatable is created only once.</li>
+<li>Fix type comparisons when different integer types are involved.</li>
+<li>Fix <tt>getmetatable()</tt> recording.</li>
+<li>Fix TDUP with dead keys in template table.</li>
+<li><tt>jit.flush(tr)</tt> returns status.
+Prevent manual flush of a trace that's still linked.</li>
+<li>Improve register allocation heuristics for invariant references.</li>
+<li>Compile the push/pop variants of <tt>table.insert()</tt> and
+<tt>table.remove()</tt>.</li>
+<li>Compatibility with MSVC <tt>link&nbsp/debug</tt>.</li>
+<li>Fix <tt>lua_iscfunction()</tt>.</li>
+<li>Fix <tt>math.random()</tt> when compiled with <tt>-fpic</tt> (OSX).</li>
+<li>Fix <tt>table.maxn()</tt>.</li>
+<li>Bump <tt>MACOSX_DEPLOYMENT_TARGET</tt> to <tt>10.4</tt></li>
+<li><tt>luaL_check*()</tt> and <tt>luaL_opt*()</tt> now support
+negative arguments, too.<br>
+This matches the behavior of Lua 5.1, but not the specification.</li>
+</ul>
+
+<h2 id="LuaJIT-2.0.0-beta1">LuaJIT 2.0.0-beta1 &mdash; 2009-10-31</h2>
+<ul>
+<li>This is the first public release of LuaJIT 2.0.</li>
+<li>The whole VM has been rewritten from the ground up, so there's
+no point in listing differences over earlier versions.</li>
+</ul>
+</div>
+
+<div class="major" style="background: #ffff80;">
+<h2 id="LuaJIT-1.1.8">LuaJIT 1.1.8 &mdash; 2012-04-16</h2>
+<ul>
+<li>Merged with Lua 5.1.5. Also integrated fixes for all
+<a href="http://www.lua.org/bugs.html#5.1.5"><span class="ext">&raquo;</span>&nbsp;<span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.5</a>.</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.7">LuaJIT 1.1.7 &mdash; 2011-05-05</h2>
+<ul>
+<li>Added fixes for the
+<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.6">LuaJIT 1.1.6 &mdash; 2010-03-28</h2>
+<ul>
+<li>Added fixes for the
+<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
+<li>Removed wrong GC check in <tt>jit_createstate()</tt>.
+Thanks to Tim Mensch.</li>
+<li>Fixed bad assertions while compiling <tt>table.insert()</tt> and
+<tt>table.remove()</tt>.</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 &mdash; 2008-10-25</h2>
+<ul>
+<li>Merged with Lua 5.1.4. Fixes all
+<a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.3</a>.</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 &mdash; 2008-02-05</h2>
+<ul>
+<li>Merged with Lua 5.1.3. Fixes all
+<a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.2</a>.</li>
+<li>Fixed possible (but unlikely) stack corruption while compiling
+<tt>k^x</tt> expressions.</li>
+<li>Fixed DynASM template for cmpss instruction.</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 &mdash; 2007-05-24</h2>
+<ul>
+<li>Merged with Lua 5.1.2. Fixes all
+<a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.1</a>.</li>
+<li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li>
+<li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li>
+<li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li>
+<li>Fix some compiler warnings for DynASM glue (internal API change).</li>
+<li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li>
+<li>The loadable debug modules now handle redirection to stdout
+(e.g. <tt>-j&nbsp;trace=-</tt>).</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 &mdash; 2006-06-24</h2>
+<ul>
+<li>Fix MSVC inline assembly: use only local variables with
+<tt>lua_number2int()</tt>.</li>
+<li>Fix "attempt to call a thread value" bug on Mac OS X:
+make values of consts used as lightuserdata keys unique
+to avoid joining by the compiler/linker.</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 &mdash; 2006-06-20</h2>
+<ul>
+<li>Merged with Lua 5.1.1. Fixes all
+<a href="http://www.lua.org/bugs.html#5.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1</a>.</li>
+<li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li>
+<li>Minor changes to DynASM: faster pre-processing, smaller encoding
+for some immediates.</li>
+</ul>
+<p>
+This release is in sync with Coco 1.1.1 (see the
+<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
+</p>
+
+<h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 &mdash; 2006-03-13</h2>
+<ul>
+<li>Merged with Lua 5.1 (final).</li>
+
+<li>New JIT call frame setup:
+<ul>
+<li>The C stack is kept 16 byte aligned (faster).
+Mandatory for Mac OS X on Intel, too.</li>
+<li>Faster calling conventions for internal C helper functions.</li>
+<li>Better instruction scheduling for function prologue, OP_CALL and
+OP_RETURN.</li>
+</ul></li>
+
+<li>Miscellaneous optimizations:
+<ul>
+<li>Faster loads of FP constants. Remove narrow-to-wide store-to-load
+forwarding stalls.</li>
+<li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves
+and FP to integer conversions.</li>
+<li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li>
+<li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>).
+With better accuracy than the C variant, too.</li>
+<li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or
+use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li>
+</ul></li>
+
+<li>Changes in the optimizer:
+<ul>
+<li>Improved hinting for table keys derived from table values
+(<tt>t1[t2[x]]</tt>).</li>
+<li>Lookup hinting now works with arbitrary object types and
+supports index chains, too.</li>
+<li>Generate type hints for arithmetic and comparison operators,
+OP_LEN, OP_CONCAT and OP_FORPREP.</li>
+<li>Remove several hint definitions in favour of a generic COMBINE hint.</li>
+<li>Complete rewrite of <tt>jit.opt_inline</tt> module
+(ex <tt>jit.opt_lib</tt>).</li>
+</ul></li>
+
+<li>Use adaptive deoptimization:
+<ul>
+<li>If runtime verification of a contract fails, the affected
+instruction is recompiled and patched on-the-fly.
+Regular programs will trigger deoptimization only occasionally.</li>
+<li>This avoids generating code for uncommon fallback cases
+most of the time. Generated code is up to 30% smaller compared to
+LuaJIT&nbsp;1.0.3.</li>
+<li>Deoptimization is used for many opcodes and contracts:
+<ul>
+<li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li>
+<li>Inlined calls: closure mismatch, parameter number and type mismatches.</li>
+<li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li>
+<li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT,
+OP_FORPREP: operand type and range mismatches.</li>
+</ul></li>
+<li>Complete redesign of the debug and traceback info
+(bytecode &harr; mcode) to support deoptimization.
+Much more flexible and needs only 50% of the space.</li>
+<li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and
+<tt>jit.dump</tt> handle deoptimization.</li>
+</ul></li>
+
+<li>Inlined many popular library functions
+(for commonly used arguments only):
+<ul>
+<li>Most <tt>math.*</tt> functions (the 18 most used ones)
+[2x-10x faster].</li>
+<li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt>
+[2x-10x faster].</li>
+<li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt>
+[3x-5x faster].</li>
+<li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt>
+[3x-5x faster].</li>
+<li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators
+[8x-15x faster].</li>
+</ul></li>
+
+<li>Changes in the core and loadable modules and the stand-alone executable:
+<ul>
+<li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt>
+and <tt>jit.arch</tt>.</li>
+<li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li>
+<li>The <tt>-j dump</tt> output now shows JSUB names, too.</li>
+<li>New x86 disassembler module written in pure Lua. No dependency
+on ndisasm anymore. Flexible API, very compact (500 lines)
+and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li>
+<li><tt>luajit -v</tt> prints the LuaJIT version and copyright
+on a separate line.</li>
+</ul></li>
+
+<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
+<li>Miscellaneous doc changes. Added a section about
+<a href="install.html#embedding">embedding LuaJIT</a>.</li>
+</ul>
+<p>
+This release is in sync with Coco 1.1.0 (see the
+<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
+</p>
+</div>
+
+<div class="major" style="background: #ffffd0;">
+<h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 &mdash; 2005-09-08</h2>
+<ul>
+<li>Even more docs.</li>
+<li>Unified closure checks in <tt>jit.*</tt>.</li>
+<li>Fixed some range checks in <tt>jit.util.*</tt>.</li>
+<li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li>
+<li>Merged with Lua 5.1 alpha (including early bug fixes).</li>
+</ul>
+<p>
+This is the first public release of LuaJIT.
+</p>
+
+<h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 &mdash; 2005-09-02</h2>
+<ul>
+<li>Add support for flushing the Valgrind translation cache <br>
+(<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li>
+<li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based
+variant for POSIX systems.</li>
+<li>Reorganized the C&nbsp;function signature handling in
+<tt>jit.opt_lib</tt>.</li>
+<li>Changed to index-based hints for inlining C&nbsp;functions.
+Still no support in the backend for inlining.</li>
+<li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li>
+<li>Misc. changes to the <tt>jit.*</tt> modules.</li>
+<li>Misc. changes to the Makefiles.</li>
+<li>Lots of new docs.</li>
+<li>Complete doc reorg.</li>
+</ul>
+<p>
+Not released because Lua 5.1 alpha came out today.
+</p>
+
+<h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 &mdash; 2005-08-31</h2>
+<ul>
+<li>Missing GC step in <tt>OP_CONCAT</tt>.</li>
+<li>Fix result handling for C &ndash;> JIT calls.</li>
+<li>Detect CPU feature bits.</li>
+<li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li>
+<li>Add fallback instructions for FP compares.</li>
+<li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li>
+<li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute
+(David Burgess).</li>
+<li>Misc. doc updates.</li>
+</ul>
+<p>
+Interim non-public release.
+Special thanks to Adam D. Moss for reporting most of the bugs.
+</p>
+
+<h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 &mdash; 2005-08-29</h2>
+<p>
+This is the initial non-public release of LuaJIT.
+</p>
+</div>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 102 - 0
luajit.mod/luajit/doc/contact.html

@@ -0,0 +1,102 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Contact</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Contact</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+Please send general questions to the
+<a href="http://luajit.org/list.html"><span class="ext">&raquo;</span>&nbsp;LuaJIT mailing list</a>.
+You can also send any questions you have directly to me:
+</p>
+
+<script type="text/javascript">
+<!--
+var xS="@-:\" .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<abc>defghijklmnopqrstuvwxyz";function xD(s)
+{var len=s.length;var r="";for(var i=0;i<len;i++)
+{var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1)c=xS.charAt(69-n);r+=c;}
+document.write("<"+"p>"+r+"<"+"/p>\n");}
+//-->
+</script>
+<script type="text/javascript">
+<!--
+xD("fyZKB8xv\"FJytmz8.KAB0u52D")
+//--></script>
+<noscript>
+<p><img src="img/contact.png" alt="Contact info in image" width="170" height="13">
+</p>
+</noscript>
+
+<h2>Copyright</h2>
+<p>
+All documentation is
+Copyright &copy; 2005-2015 Mike Pall.
+</p>
+
+
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 187 - 0
luajit.mod/luajit/doc/ext_c_api.html

@@ -0,0 +1,187 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Lua/C API Extensions</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Lua/C API Extensions</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a class="current" href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT adds some extensions to the standard Lua/C API. The LuaJIT include
+directory must be in the compiler search path (<tt>-I<i>path</i></tt>)
+to be able to include the required header for C code:
+</p>
+<pre class="code">
+#include "luajit.h"
+</pre>
+<p>
+Or for C++ code:
+</p>
+<pre class="code">
+#include "lua.hpp"
+</pre>
+
+<h2 id="luaJIT_setmode"><tt>luaJIT_setmode(L, idx, mode)</tt>
+&mdash; Control VM</h2>
+<p>
+This is a C API extension to allow control of the VM from C code. The
+full prototype of <tt>LuaJIT_setmode</tt> is:
+</p>
+<pre class="code">
+LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
+</pre>
+<p>
+The returned status is either success (<tt>1</tt>) or failure (<tt>0</tt>).
+The second argument is either <tt>0</tt> or a stack index (similar to the
+other Lua/C API functions).
+</p>
+<p>
+The third argument specifies the mode, which is 'or'ed with a flag.
+The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on,
+<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or
+<tt>LUAJIT_MODE_FLUSH</tt> to flush cached code.
+</p>
+<p>
+The following modes are defined:
+</p>
+
+<h3 id="mode_engine"><tt>luaJIT_setmode(L, 0, LUAJIT_MODE_ENGINE|flag)</tt></h3>
+<p>
+Turn the whole JIT compiler on or off or flush the whole cache of compiled code.
+</p>
+
+<h3 id="mode_func"><tt>luaJIT_setmode(L, idx, LUAJIT_MODE_FUNC|flag)</tt><br>
+<tt>luaJIT_setmode(L, idx, LUAJIT_MODE_ALLFUNC|flag)</tt><br>
+<tt>luaJIT_setmode(L, idx, LUAJIT_MODE_ALLSUBFUNC|flag)</tt></h3>
+<p>
+This sets the mode for the function at the stack index <tt>idx</tt> or
+the parent of the calling function (<tt>idx = 0</tt>). It either
+enables JIT compilation for a function, disables it and flushes any
+already compiled code or only flushes already compiled code. This
+applies recursively to all sub-functions of the function with
+<tt>LUAJIT_MODE_ALLFUNC</tt> or only to the sub-functions with
+<tt>LUAJIT_MODE_ALLSUBFUNC</tt>.
+</p>
+
+<h3 id="mode_trace"><tt>luaJIT_setmode(L, trace,<br>
+&nbsp;&nbsp;LUAJIT_MODE_TRACE|LUAJIT_MODE_FLUSH)</tt></h3>
+<p>
+Flushes the specified root trace and all of its side traces from the cache.
+The code for the trace will be retained as long as there are any other
+traces which link to it.
+</p>
+
+<h3 id="mode_wrapcfunc"><tt>luaJIT_setmode(L, idx, LUAJIT_MODE_WRAPCFUNC|flag)</tt></h3>
+<p>
+This mode defines a wrapper function for calls to C functions. If
+called with <tt>LUAJIT_MODE_ON</tt>, the stack index at <tt>idx</tt>
+must be a <tt>lightuserdata</tt> object holding a pointer to the wrapper
+function. From now on all C functions are called through the wrapper
+function. If called with <tt>LUAJIT_MODE_OFF</tt> this mode is turned
+off and all C functions are directly called.
+</p>
+<p>
+The wrapper function can be used for debugging purposes or to catch
+and convert foreign exceptions. But please read the section on
+<a href="extensions.html#exceptions">C++&nbsp;exception interoperability</a>
+first. Recommended usage can be seen in this C++ code excerpt:
+</p>
+<pre class="code">
+#include &lt;exception&gt;
+#include "lua.hpp"
+
+// Catch C++ exceptions and convert them to Lua error messages.
+// Customize as needed for your own exception classes.
+static int wrap_exceptions(lua_State *L, lua_CFunction f)
+{
+  try {
+    return f(L);  // Call wrapped function and return result.
+  } catch (const char *s) {  // Catch and convert exceptions.
+    lua_pushstring(L, s);
+  } catch (std::exception& e) {
+    lua_pushstring(L, e.what());
+  } catch (...) {
+    lua_pushliteral(L, "caught (...)");
+  }
+  return lua_error(L);  // Rethrow as a Lua error.
+}
+
+static int myinit(lua_State *L)
+{
+  ...
+  // Define wrapper function and enable it.
+  lua_pushlightuserdata(L, (void *)wrap_exceptions);
+  luaJIT_setmode(L, -1, LUAJIT_MODE_WRAPCFUNC|LUAJIT_MODE_ON);
+  lua_pop(L, 1);
+  ...
+}
+</pre>
+<p>
+Note that you can only define <b>a single global wrapper function</b>,
+so be careful when using this mechanism from multiple C++ modules.
+Also note that this mechanism is not without overhead.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 330 - 0
luajit.mod/luajit/doc/ext_ffi.html

@@ -0,0 +1,330 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>FFI Library</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>FFI Library</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a class="current" href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+
+The FFI library allows <b>calling external C&nbsp;functions</b> and
+<b>using C&nbsp;data structures</b> from pure Lua code.
+
+</p>
+<p>
+
+The FFI library largely obviates the need to write tedious manual
+Lua/C bindings in C. No need to learn a separate binding language
+&mdash; <b>it parses plain C&nbsp;declarations!</b> These can be
+cut-n-pasted from C&nbsp;header files or reference manuals. It's up to
+the task of binding large libraries without the need for dealing with
+fragile binding generators.
+
+</p>
+<p>
+The FFI library is tightly integrated into LuaJIT (it's not available
+as a separate module). The code generated by the JIT-compiler for
+accesses to C&nbsp;data structures from Lua code is on par with the
+code a C&nbsp;compiler would generate. Calls to C&nbsp;functions can
+be inlined in JIT-compiled code, unlike calls to functions bound via
+the classic Lua/C API.
+</p>
+<p>
+This page gives a short introduction to the usage of the FFI library.
+<em>Please use the FFI sub-topics in the navigation bar to learn more.</em>
+</p>
+
+<h2 id="call">Motivating Example: Calling External C Functions</h2>
+<p>
+It's really easy to call an external C&nbsp;library function:
+</p>
+<pre class="code mark">
+<span class="codemark">&#9312;
+&#9313;
+
+
+&#9314;</span>local ffi = require("ffi")
+ffi.cdef[[
+<span style="color:#00a000;">int printf(const char *fmt, ...);</span>
+]]
+ffi.C.printf("Hello %s!", "world")
+</pre>
+<p>
+So, let's pick that apart:
+</p>
+<p>
+<span class="mark">&#9312;</span> Load the FFI library.
+</p>
+<p>
+<span class="mark">&#9313;</span> Add a C&nbsp;declaration
+for the function. The part inside the double-brackets (in green) is
+just standard C&nbsp;syntax.
+</p>
+<p>
+<span class="mark">&#9314;</span> Call the named
+C&nbsp;function &mdash; Yes, it's that simple!
+</p>
+<p style="font-size: 8pt;">
+Actually, what goes on behind the scenes is far from simple: <span
+style="color:#4040c0;">&#9314;</span> makes use of the standard
+C&nbsp;library namespace <tt>ffi.C</tt>. Indexing this namespace with
+a symbol name (<tt>"printf"</tt>) automatically binds it to the
+standard C&nbsp;library. The result is a special kind of object which,
+when called, runs the <tt>printf</tt> function. The arguments passed
+to this function are automatically converted from Lua objects to the
+corresponding C&nbsp;types.
+</p>
+<p>
+Ok, so maybe the use of <tt>printf()</tt> wasn't such a spectacular
+example. You could have done that with <tt>io.write()</tt> and
+<tt>string.format()</tt>, too. But you get the idea ...
+</p>
+<p>
+So here's something to pop up a message box on Windows:
+</p>
+<pre class="code">
+local ffi = require("ffi")
+ffi.cdef[[
+<span style="color:#00a000;">int MessageBoxA(void *w, const char *txt, const char *cap, int type);</span>
+]]
+ffi.C.MessageBoxA(nil, "Hello world!", "Test", 0)
+</pre>
+<p>
+Bing! Again, that was far too easy, no?
+</p>
+<p style="font-size: 8pt;">
+Compare this with the effort required to bind that function using the
+classic Lua/C API: create an extra C&nbsp;file, add a C&nbsp;function
+that retrieves and checks the argument types passed from Lua and calls
+the actual C&nbsp;function, add a list of module functions and their
+names, add a <tt>luaopen_*</tt> function and register all module
+functions, compile and link it into a shared library (DLL), move it to
+the proper path, add Lua code that loads the module aaaand ... finally
+call the binding function. Phew!
+</p>
+
+<h2 id="cdata">Motivating Example: Using C Data Structures</h2>
+<p>
+The FFI library allows you to create and access C&nbsp;data
+structures. Of course the main use for this is for interfacing with
+C&nbsp;functions. But they can be used stand-alone, too.
+</p>
+<p>
+Lua is built upon high-level data types. They are flexible, extensible
+and dynamic. That's why we all love Lua so much. Alas, this can be
+inefficient for certain tasks, where you'd really want a low-level
+data type. E.g. a large array of a fixed structure needs to be
+implemented with a big table holding lots of tiny tables. This imposes
+both a substantial memory overhead as well as a performance overhead.
+</p>
+<p>
+Here's a sketch of a library that operates on color images plus a
+simple benchmark. First, the plain Lua version:
+</p>
+<pre class="code">
+local floor = math.floor
+
+local function image_ramp_green(n)
+  local img = {}
+  local f = 255/(n-1)
+  for i=1,n do
+    img[i] = { red = 0, green = floor((i-1)*f), blue = 0, alpha = 255 }
+  end
+  return img
+end
+
+local function image_to_grey(img, n)
+  for i=1,n do
+    local y = floor(0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue)
+    img[i].red = y; img[i].green = y; img[i].blue = y
+  end
+end
+
+local N = 400*400
+local img = image_ramp_green(N)
+for i=1,1000 do
+  image_to_grey(img, N)
+end
+</pre>
+<p>
+This creates a table with 160.000 pixels, each of which is a table
+holding four number values in the range of 0-255. First an image with
+a green ramp is created (1D for simplicity), then the image is
+converted to greyscale 1000 times. Yes, that's silly, but I was in
+need of a simple example ...
+</p>
+<p>
+And here's the FFI version. The modified parts have been marked in
+bold:
+</p>
+<pre class="code mark">
+<span class="codemark">&#9312;
+
+
+
+
+
+&#9313;
+
+&#9314;
+&#9315;
+
+
+
+
+
+
+&#9314;
+&#9316;</span><b>local ffi = require("ffi")
+ffi.cdef[[
+</b><span style="color:#00a000;">typedef struct { uint8_t red, green, blue, alpha; } rgba_pixel;</span><b>
+]]</b>
+
+local function image_ramp_green(n)
+  <b>local img = ffi.new("rgba_pixel[?]", n)</b>
+  local f = 255/(n-1)
+  for i=<b>0,n-1</b> do
+    <b>img[i].green = i*f</b>
+    <b>img[i].alpha = 255</b>
+  end
+  return img
+end
+
+local function image_to_grey(img, n)
+  for i=<b>0,n-1</b> do
+    local y = <b>0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue</b>
+    img[i].red = y; img[i].green = y; img[i].blue = y
+  end
+end
+
+local N = 400*400
+local img = image_ramp_green(N)
+for i=1,1000 do
+  image_to_grey(img, N)
+end
+</pre>
+<p>
+Ok, so that wasn't too difficult:
+</p>
+<p>
+<span class="mark">&#9312;</span> First, load the FFI
+library and declare the low-level data type. Here we choose a
+<tt>struct</tt> which holds four byte fields, one for each component
+of a 4x8&nbsp;bit RGBA pixel.
+</p>
+<p>
+<span class="mark">&#9313;</span> Creating the data
+structure with <tt>ffi.new()</tt> is straightforward &mdash; the
+<tt>'?'</tt> is a placeholder for the number of elements of a
+variable-length array.
+</p>
+<p>
+<span class="mark">&#9314;</span> C&nbsp;arrays are
+zero-based, so the indexes have to run from <tt>0</tt> to
+<tt>n-1</tt>. One might want to allocate one more element instead to
+simplify converting legacy code.
+</p>
+<p>
+<span class="mark">&#9315;</span> Since <tt>ffi.new()</tt>
+zero-fills the array by default, we only need to set the green and the
+alpha fields.
+</p>
+<p>
+<span class="mark">&#9316;</span> The calls to
+<tt>math.floor()</tt> can be omitted here, because floating-point
+numbers are already truncated towards zero when converting them to an
+integer. This happens implicitly when the number is stored in the
+fields of each pixel.
+</p>
+<p>
+Now let's have a look at the impact of the changes: first, memory
+consumption for the image is down from 22&nbsp;Megabytes to
+640&nbsp;Kilobytes (400*400*4 bytes). That's a factor of 35x less! So,
+yes, tables do have a noticeable overhead. BTW: The original program
+would consume 40&nbsp;Megabytes in plain Lua (on x64).
+</p>
+<p>
+Next, performance: the pure Lua version runs in 9.57 seconds (52.9
+seconds with the Lua interpreter) and the FFI version runs in 0.48
+seconds on my machine (YMMV). That's a factor of 20x faster (110x
+faster than the Lua interpreter).
+</p>
+<p style="font-size: 8pt;">
+The avid reader may notice that converting the pure Lua version over
+to use array indexes for the colors (<tt>[1]</tt> instead of
+<tt>.red</tt>, <tt>[2]</tt> instead of <tt>.green</tt> etc.) ought to
+be more compact and faster. This is certainly true (by a factor of
+~1.7x). Switching to a struct-of-arrays would help, too.
+</p>
+<p style="font-size: 8pt;">
+However the resulting code would be less idiomatic and rather
+error-prone. And it still doesn't get even close to the performance of
+the FFI version of the code. Also, high-level data structures cannot
+be easily passed to other C&nbsp;functions, especially I/O functions,
+without undue conversion penalties.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 566 - 0
luajit.mod/luajit/doc/ext_ffi_api.html

@@ -0,0 +1,566 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>ffi.* API Functions</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+table.abitable { width: 30em; line-height: 1.2; }
+tr.abihead td { font-weight: bold; }
+td.abiparam { font-weight: bold; width: 6em; }
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1><tt>ffi.*</tt> API Functions</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a class="current" href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+This page describes the API functions provided by the FFI library in
+detail. It's recommended to read through the
+<a href="ext_ffi.html">introduction</a> and the
+<a href="ext_ffi_tutorial.html">FFI tutorial</a> first.
+</p>
+
+<h2 id="glossary">Glossary</h2>
+<ul>
+<li><b>cdecl</b> &mdash; An abstract C&nbsp;type declaration (a Lua
+string).</li>
+<li><b>ctype</b> &mdash; A C&nbsp;type object. This is a special kind of
+<b>cdata</b> returned by <tt>ffi.typeof()</tt>. It serves as a
+<b>cdata</b> <a href="#ffi_new">constructor</a> when called.</li>
+<li><b>cdata</b> &mdash; A C&nbsp;data object. It holds a value of the
+corresponding <b>ctype</b>.</li>
+<li><b>ct</b> &mdash; A C&nbsp;type specification which can be used for
+most of the API functions. Either a <b>cdecl</b>, a <b>ctype</b> or a
+<b>cdata</b> serving as a template type.</li>
+<li><b>cb</b> &mdash; A callback object. This is a C&nbsp;data object
+holding a special function pointer. Calling this function from
+C&nbsp;code runs an associated Lua function.</li>
+<li><b>VLA</b> &mdash; A variable-length array is declared with a
+<tt>?</tt> instead of the number of elements, e.g. <tt>"int[?]"</tt>.
+The number of elements (<tt>nelem</tt>) must be given when it's
+<a href="#ffi_new">created</a>.</li>
+<li><b>VLS</b> &mdash; A variable-length struct is a <tt>struct</tt> C
+type where the last element is a <b>VLA</b>. The same rules for
+declaration and creation apply.</li>
+</ul>
+
+<h2 id="decl">Declaring and Accessing External Symbols</h2>
+<p>
+External symbols must be declared first and can then be accessed by
+indexing a <a href="ext_ffi_semantics.html#clib">C&nbsp;library
+namespace</a>, which automatically binds the symbol to a specific
+library.
+</p>
+
+<h3 id="ffi_cdef"><tt>ffi.cdef(def)</tt></h3>
+<p>
+Adds multiple C&nbsp;declarations for types or external symbols (named
+variables or functions). <tt>def</tt> must be a Lua string. It's
+recommended to use the syntactic sugar for string arguments as
+follows:
+</p>
+<pre class="code">
+ffi.cdef[[
+<span style="color:#00a000;">typedef struct foo { int a, b; } foo_t;  // Declare a struct and typedef.
+int dofoo(foo_t *f, int n);  /* Declare an external C function. */</span>
+]]
+</pre>
+<p>
+The contents of the string (the part in green above) must be a
+sequence of
+<a href="ext_ffi_semantics.html#clang">C&nbsp;declarations</a>,
+separated by semicolons. The trailing semicolon for a single
+declaration may be omitted.
+</p>
+<p>
+Please note that external symbols are only <em>declared</em>, but they
+are <em>not bound</em> to any specific address, yet. Binding is
+achieved with C&nbsp;library namespaces (see below).
+</p>
+<p style="color: #c00000;">
+C&nbsp;declarations are not passed through a C&nbsp;pre-processor,
+yet. No pre-processor tokens are allowed, except for
+<tt>#pragma&nbsp;pack</tt>. Replace <tt>#define</tt> in existing
+C&nbsp;header files with <tt>enum</tt>, <tt>static&nbsp;const</tt>
+or <tt>typedef</tt> and/or pass the files through an external
+C&nbsp;pre-processor (once). Be careful not to include unneeded or
+redundant declarations from unrelated header files.
+</p>
+
+<h3 id="ffi_C"><tt>ffi.C</tt></h3>
+<p>
+This is the default C&nbsp;library namespace &mdash; note the
+uppercase <tt>'C'</tt>. It binds to the default set of symbols or
+libraries on the target system. These are more or less the same as a
+C&nbsp;compiler would offer by default, without specifying extra link
+libraries.
+</p>
+<p>
+On POSIX systems, this binds to symbols in the default or global
+namespace. This includes all exported symbols from the executable and
+any libraries loaded into the global namespace. This includes at least
+<tt>libc</tt>, <tt>libm</tt>, <tt>libdl</tt> (on Linux),
+<tt>libgcc</tt> (if compiled with GCC), as well as any exported
+symbols from the Lua/C&nbsp;API provided by LuaJIT itself.
+</p>
+<p>
+On Windows systems, this binds to symbols exported from the
+<tt>*.exe</tt>, the <tt>lua51.dll</tt> (i.e. the Lua/C&nbsp;API
+provided by LuaJIT itself), the C&nbsp;runtime library LuaJIT was linked
+with (<tt>msvcrt*.dll</tt>), <tt>kernel32.dll</tt>,
+<tt>user32.dll</tt> and <tt>gdi32.dll</tt>.
+</p>
+
+<h3 id="ffi_load"><tt>clib = ffi.load(name [,global])</tt></h3>
+<p>
+This loads the dynamic library given by <tt>name</tt> and returns
+a new C&nbsp;library namespace which binds to its symbols. On POSIX
+systems, if <tt>global</tt> is <tt>true</tt>, the library symbols are
+loaded into the global namespace, too.
+</p>
+<p>
+If <tt>name</tt> is a path, the library is loaded from this path.
+Otherwise <tt>name</tt> is canonicalized in a system-dependent way and
+searched in the default search path for dynamic libraries:
+</p>
+<p>
+On POSIX systems, if the name contains no dot, the extension
+<tt>.so</tt> is appended. Also, the <tt>lib</tt> prefix is prepended
+if necessary. So <tt>ffi.load("z")</tt> looks for <tt>"libz.so"</tt>
+in the default shared library search path.
+</p>
+<p>
+On Windows systems, if the name contains no dot, the extension
+<tt>.dll</tt> is appended. So <tt>ffi.load("ws2_32")</tt> looks for
+<tt>"ws2_32.dll"</tt> in the default DLL search path.
+</p>
+
+<h2 id="create">Creating cdata Objects</h2>
+<p>
+The following API functions create cdata objects (<tt>type()</tt>
+returns <tt>"cdata"</tt>). All created cdata objects are
+<a href="ext_ffi_semantics.html#gc">garbage collected</a>.
+</p>
+
+<h3 id="ffi_new"><tt>cdata = ffi.new(ct [,nelem] [,init...])<br>
+cdata = <em>ctype</em>([nelem,] [init...])</tt></h3>
+<p>
+Creates a cdata object for the given <tt>ct</tt>. VLA/VLS types
+require the <tt>nelem</tt> argument. The second syntax uses a ctype as
+a constructor and is otherwise fully equivalent.
+</p>
+<p>
+The cdata object is initialized according to the
+<a href="ext_ffi_semantics.html#init">rules for initializers</a>,
+using the optional <tt>init</tt> arguments. Excess initializers cause
+an error.
+</p>
+<p>
+Performance notice: if you want to create many objects of one kind,
+parse the cdecl only once and get its ctype with
+<tt>ffi.typeof()</tt>. Then use the ctype as a constructor repeatedly.
+</p>
+<p style="font-size: 8pt;">
+Please note that an anonymous <tt>struct</tt> declaration implicitly
+creates a new and distinguished ctype every time you use it for
+<tt>ffi.new()</tt>. This is probably <b>not</b> what you want,
+especially if you create more than one cdata object. Different anonymous
+<tt>structs</tt> are not considered assignment-compatible by the
+C&nbsp;standard, even though they may have the same fields! Also, they
+are considered different types by the JIT-compiler, which may cause an
+excessive number of traces. It's strongly suggested to either declare
+a named <tt>struct</tt> or <tt>typedef</tt> with <tt>ffi.cdef()</tt>
+or to create a single ctype object for an anonymous <tt>struct</tt>
+with <tt>ffi.typeof()</tt>.
+</p>
+
+<h3 id="ffi_typeof"><tt>ctype = ffi.typeof(ct)</tt></h3>
+<p>
+Creates a ctype object for the given <tt>ct</tt>.
+</p>
+<p>
+This function is especially useful to parse a cdecl only once and then
+use the resulting ctype object as a <a href="#ffi_new">constructor</a>.
+</p>
+
+<h3 id="ffi_cast"><tt>cdata = ffi.cast(ct, init)</tt></h3>
+<p>
+Creates a scalar cdata object for the given <tt>ct</tt>. The cdata
+object is initialized with <tt>init</tt> using the "cast" variant of
+the <a href="ext_ffi_semantics.html#convert">C&nbsp;type conversion
+rules</a>.
+</p>
+<p>
+This functions is mainly useful to override the pointer compatibility
+checks or to convert pointers to addresses or vice versa.
+</p>
+
+<h3 id="ffi_metatype"><tt>ctype = ffi.metatype(ct, metatable)</tt></h3>
+<p>
+Creates a ctype object for the given <tt>ct</tt> and associates it with
+a metatable. Only <tt>struct</tt>/<tt>union</tt> types, complex numbers
+and vectors are allowed. Other types may be wrapped in a
+<tt>struct</tt>, if needed.
+</p>
+<p>
+The association with a metatable is permanent and cannot be changed
+afterwards. Neither the contents of the <tt>metatable</tt> nor the
+contents of an <tt>__index</tt> table (if any) may be modified
+afterwards. The associated metatable automatically applies to all uses
+of this type, no matter how the objects are created or where they
+originate from. Note that pre-defined operations on types have
+precedence (e.g. declared field names cannot be overriden).
+</p>
+<p>
+All standard Lua metamethods are implemented. These are called directly,
+without shortcuts and on any mix of types. For binary operations, the
+left operand is checked first for a valid ctype metamethod. The
+<tt>__gc</tt> metamethod only applies to <tt>struct</tt>/<tt>union</tt>
+types and performs an implicit <a href="#ffi_gc"><tt>ffi.gc()</tt></a>
+call during creation of an instance.
+</p>
+
+<h3 id="ffi_gc"><tt>cdata = ffi.gc(cdata, finalizer)</tt></h3>
+<p>
+Associates a finalizer with a pointer or aggregate cdata object. The
+cdata object is returned unchanged.
+</p>
+<p>
+This function allows safe integration of unmanaged resources into the
+automatic memory management of the LuaJIT garbage collector. Typical
+usage:
+</p>
+<pre class="code">
+local p = ffi.gc(ffi.C.malloc(n), ffi.C.free)
+...
+p = nil -- Last reference to p is gone.
+-- GC will eventually run finalizer: ffi.C.free(p)
+</pre>
+<p>
+A cdata finalizer works like the <tt>__gc</tt> metamethod for userdata
+objects: when the last reference to a cdata object is gone, the
+associated finalizer is called with the cdata object as an argument. The
+finalizer can be a Lua function or a cdata function or cdata function
+pointer. An existing finalizer can be removed by setting a <tt>nil</tt>
+finalizer, e.g. right before explicitly deleting a resource:
+</p>
+<pre class="code">
+ffi.C.free(ffi.gc(p, nil)) -- Manually free the memory.
+</pre>
+
+<h2 id="info">C&nbsp;Type Information</h2>
+<p>
+The following API functions return information about C&nbsp;types.
+They are most useful for inspecting cdata objects.
+</p>
+
+<h3 id="ffi_sizeof"><tt>size = ffi.sizeof(ct [,nelem])</tt></h3>
+<p>
+Returns the size of <tt>ct</tt> in bytes. Returns <tt>nil</tt> if
+the size is not known (e.g. for <tt>"void"</tt> or function types).
+Requires <tt>nelem</tt> for VLA/VLS types, except for cdata objects.
+</p>
+
+<h3 id="ffi_alignof"><tt>align = ffi.alignof(ct)</tt></h3>
+<p>
+Returns the minimum required alignment for <tt>ct</tt> in bytes.
+</p>
+
+<h3 id="ffi_offsetof"><tt>ofs [,bpos,bsize] = ffi.offsetof(ct, field)</tt></h3>
+<p>
+Returns the offset (in bytes) of <tt>field</tt> relative to the start
+of <tt>ct</tt>, which must be a <tt>struct</tt>. Additionally returns
+the position and the field size (in bits) for bit fields.
+</p>
+
+<h3 id="ffi_istype"><tt>status = ffi.istype(ct, obj)</tt></h3>
+<p>
+Returns <tt>true</tt> if <tt>obj</tt> has the C&nbsp;type given by
+<tt>ct</tt>. Returns <tt>false</tt> otherwise.
+</p>
+<p>
+C&nbsp;type qualifiers (<tt>const</tt> etc.) are ignored. Pointers are
+checked with the standard pointer compatibility rules, but without any
+special treatment for <tt>void&nbsp;*</tt>. If <tt>ct</tt> specifies a
+<tt>struct</tt>/<tt>union</tt>, then a pointer to this type is accepted,
+too. Otherwise the types must match exactly.
+</p>
+<p>
+Note: this function accepts all kinds of Lua objects for the
+<tt>obj</tt> argument, but always returns <tt>false</tt> for non-cdata
+objects.
+</p>
+
+<h2 id="util">Utility Functions</h2>
+
+<h3 id="ffi_errno"><tt>err = ffi.errno([newerr])</tt></h3>
+<p>
+Returns the error number set by the last C&nbsp;function call which
+indicated an error condition. If the optional <tt>newerr</tt> argument
+is present, the error number is set to the new value and the previous
+value is returned.
+</p>
+<p>
+This function offers a portable and OS-independent way to get and set the
+error number. Note that only <em>some</em> C&nbsp;functions set the error
+number. And it's only significant if the function actually indicated an
+error condition (e.g. with a return value of <tt>-1</tt> or
+<tt>NULL</tt>). Otherwise, it may or may not contain any previously set
+value.
+</p>
+<p>
+You're advised to call this function only when needed and as close as
+possible after the return of the related C&nbsp;function. The
+<tt>errno</tt> value is preserved across hooks, memory allocations,
+invocations of the JIT compiler and other internal VM activity. The same
+applies to the value returned by <tt>GetLastError()</tt> on Windows, but
+you need to declare and call it yourself.
+</p>
+
+<h3 id="ffi_string"><tt>str = ffi.string(ptr [,len])</tt></h3>
+<p>
+Creates an interned Lua string from the data pointed to by
+<tt>ptr</tt>.
+</p>
+<p>
+If the optional argument <tt>len</tt> is missing, <tt>ptr</tt> is
+converted to a <tt>"char&nbsp;*"</tt> and the data is assumed to be
+zero-terminated. The length of the string is computed with
+<tt>strlen()</tt>.
+</p>
+<p>
+Otherwise <tt>ptr</tt> is converted to a <tt>"void&nbsp;*"</tt> and
+<tt>len</tt> gives the length of the data. The data may contain
+embedded zeros and need not be byte-oriented (though this may cause
+endianess issues).
+</p>
+<p>
+This function is mainly useful to convert (temporary)
+<tt>"const&nbsp;char&nbsp;*"</tt> pointers returned by
+C&nbsp;functions to Lua strings and store them or pass them to other
+functions expecting a Lua string. The Lua string is an (interned) copy
+of the data and bears no relation to the original data area anymore.
+Lua strings are 8&nbsp;bit clean and may be used to hold arbitrary,
+non-character data.
+</p>
+<p>
+Performance notice: it's faster to pass the length of the string, if
+it's known. E.g. when the length is returned by a C&nbsp;call like
+<tt>sprintf()</tt>.
+</p>
+
+<h3 id="ffi_copy"><tt>ffi.copy(dst, src, len)<br>
+ffi.copy(dst, str)</tt></h3>
+<p>
+Copies the data pointed to by <tt>src</tt> to <tt>dst</tt>.
+<tt>dst</tt> is converted to a <tt>"void&nbsp;*"</tt> and <tt>src</tt>
+is converted to a <tt>"const void&nbsp;*"</tt>.
+</p>
+<p>
+In the first syntax, <tt>len</tt> gives the number of bytes to copy.
+Caveat: if <tt>src</tt> is a Lua string, then <tt>len</tt> must not
+exceed <tt>#src+1</tt>.
+</p>
+<p>
+In the second syntax, the source of the copy must be a Lua string. All
+bytes of the string <em>plus a zero-terminator</em> are copied to
+<tt>dst</tt> (i.e. <tt>#src+1</tt> bytes).
+</p>
+<p>
+Performance notice: <tt>ffi.copy()</tt> may be used as a faster
+(inlinable) replacement for the C&nbsp;library functions
+<tt>memcpy()</tt>, <tt>strcpy()</tt> and <tt>strncpy()</tt>.
+</p>
+
+<h3 id="ffi_fill"><tt>ffi.fill(dst, len [,c])</tt></h3>
+<p>
+Fills the data pointed to by <tt>dst</tt> with <tt>len</tt> constant
+bytes, given by <tt>c</tt>. If <tt>c</tt> is omitted, the data is
+zero-filled.
+</p>
+<p>
+Performance notice: <tt>ffi.fill()</tt> may be used as a faster
+(inlinable) replacement for the C&nbsp;library function
+<tt>memset(dst,&nbsp;c,&nbsp;len)</tt>. Please note the different
+order of arguments!
+</p>
+
+<h2 id="target">Target-specific Information</h2>
+
+<h3 id="ffi_abi"><tt>status = ffi.abi(param)</tt></h3>
+<p>
+Returns <tt>true</tt> if <tt>param</tt> (a Lua string) applies for the
+target ABI (Application Binary Interface). Returns <tt>false</tt>
+otherwise. The following parameters are currently defined:
+</p>
+<table class="abitable">
+<tr class="abihead">
+<td class="abiparam">Parameter</td>
+<td class="abidesc">Description</td>
+</tr>
+<tr class="odd separate">
+<td class="abiparam">32bit</td><td class="abidesc">32 bit architecture</td></tr>
+<tr class="even">
+<td class="abiparam">64bit</td><td class="abidesc">64 bit architecture</td></tr>
+<tr class="odd separate">
+<td class="abiparam">le</td><td class="abidesc">Little-endian architecture</td></tr>
+<tr class="even">
+<td class="abiparam">be</td><td class="abidesc">Big-endian architecture</td></tr>
+<tr class="odd separate">
+<td class="abiparam">fpu</td><td class="abidesc">Target has a hardware FPU</td></tr>
+<tr class="even">
+<td class="abiparam">softfp</td><td class="abidesc">softfp calling conventions</td></tr>
+<tr class="odd">
+<td class="abiparam">hardfp</td><td class="abidesc">hardfp calling conventions</td></tr>
+<tr class="even separate">
+<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
+<tr class="odd">
+<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
+</table>
+
+<h3 id="ffi_os"><tt>ffi.os</tt></h3>
+<p>
+Contains the target OS name. Same contents as
+<a href="ext_jit.html#jit_os"><tt>jit.os</tt></a>.
+</p>
+
+<h3 id="ffi_arch"><tt>ffi.arch</tt></h3>
+<p>
+Contains the target architecture name. Same contents as
+<a href="ext_jit.html#jit_arch"><tt>jit.arch</tt></a>.
+</p>
+
+<h2 id="callback">Methods for Callbacks</h2>
+<p>
+The C&nbsp;types for <a href="ext_ffi_semantics.html#callback">callbacks</a>
+have some extra methods:
+</p>
+
+<h3 id="callback_free"><tt>cb:free()</tt></h3>
+<p>
+Free the resources associated with a callback. The associated Lua
+function is unanchored and may be garbage collected. The callback
+function pointer is no longer valid and must not be called anymore
+(it may be reused by a subsequently created callback).
+</p>
+
+<h3 id="callback_set"><tt>cb:set(func)</tt></h3>
+<p>
+Associate a new Lua function with a callback. The C&nbsp;type of the
+callback and the callback function pointer are unchanged.
+</p>
+<p>
+This method is useful to dynamically switch the receiver of callbacks
+without creating a new callback each time and registering it again (e.g.
+with a GUI library).
+</p>
+
+<h2 id="extended">Extended Standard Library Functions</h2>
+<p>
+The following standard library functions have been extended to work
+with cdata objects:
+</p>
+
+<h3 id="tonumber"><tt>n = tonumber(cdata)</tt></h3>
+<p>
+Converts a number cdata object to a <tt>double</tt> and returns it as
+a Lua number. This is particularly useful for boxed 64&nbsp;bit
+integer values. Caveat: this conversion may incur a precision loss.
+</p>
+
+<h3 id="tostring"><tt>s = tostring(cdata)</tt></h3>
+<p>
+Returns a string representation of the value of 64&nbsp;bit integers
+(<tt><b>"</b>nnn<b>LL"</b></tt> or <tt><b>"</b>nnn<b>ULL"</b></tt>) or
+complex numbers (<tt><b>"</b>re&plusmn;im<b>i"</b></tt>). Otherwise
+returns a string representation of the C&nbsp;type of a ctype object
+(<tt><b>"ctype&lt;</b>type<b>&gt;"</b></tt>) or a cdata object
+(<tt><b>"cdata&lt;</b>type<b>&gt;:&nbsp;</b>address"</tt>), unless you
+override it with a <tt>__tostring</tt> metamethod (see
+<a href="#ffi_metatype"><tt>ffi.metatype()</tt></a>).
+</p>
+
+<h3 id="pairs"><tt>iter, obj, start = pairs(cdata)<br>
+iter, obj, start = ipairs(cdata)<br></tt></h3>
+<p>
+Calls the <tt>__pairs</tt> or <tt>__ipairs</tt> metamethod of the
+corresponding ctype.
+</p>
+
+<h2 id="literals">Extensions to the Lua Parser</h2>
+<p>
+The parser for Lua source code treats numeric literals with the
+suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
+integers. Case doesn't matter, but uppercase is recommended for
+readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal
+(<tt>0x2aLL</tt>) literals.
+</p>
+<p>
+The imaginary part of complex numbers can be specified by suffixing
+number literals with <tt>i</tt> or <tt>I</tt>, e.g. <tt>12.5i</tt>.
+Caveat: you'll need to use <tt>1i</tt> to get an imaginary part with
+the value one, since <tt>i</tt> itself still refers to a variable
+named <tt>i</tt>.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 1245 - 0
luajit.mod/luajit/doc/ext_ffi_semantics.html

@@ -0,0 +1,1245 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>FFI Semantics</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+table.convtable { line-height: 1.2; }
+tr.convhead td { font-weight: bold; }
+td.convop { font-style: italic; width: 40%; }
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>FFI Semantics</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a class="current" href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+This page describes the detailed semantics underlying the FFI library
+and its interaction with both Lua and C&nbsp;code.
+</p>
+<p>
+Given that the FFI library is designed to interface with C&nbsp;code
+and that declarations can be written in plain C&nbsp;syntax, <b>it
+closely follows the C&nbsp;language semantics</b>, wherever possible.
+Some minor concessions are needed for smoother interoperation with Lua
+language semantics.
+</p>
+<p>
+Please don't be overwhelmed by the contents of this page &mdash; this
+is a reference and you may need to consult it, if in doubt. It doesn't
+hurt to skim this page, but most of the semantics "just work" as you'd
+expect them to work. It should be straightforward to write
+applications using the LuaJIT FFI for developers with a C or C++
+background.
+</p>
+
+<h2 id="clang">C Language Support</h2>
+<p>
+The FFI library has a built-in C&nbsp;parser with a minimal memory
+footprint. It's used by the <a href="ext_ffi_api.html">ffi.* library
+functions</a> to declare C&nbsp;types or external symbols.
+</p>
+<p>
+It's only purpose is to parse C&nbsp;declarations, as found e.g. in
+C&nbsp;header files. Although it does evaluate constant expressions,
+it's <em>not</em> a C&nbsp;compiler. The body of <tt>inline</tt>
+C&nbsp;function definitions is simply ignored.
+</p>
+<p>
+Also, this is <em>not</em> a validating C&nbsp;parser. It expects and
+accepts correctly formed C&nbsp;declarations, but it may choose to
+ignore bad declarations or show rather generic error messages. If in
+doubt, please check the input against your favorite C&nbsp;compiler.
+</p>
+<p>
+The C&nbsp;parser complies to the <b>C99 language standard</b> plus
+the following extensions:
+</p>
+<ul>
+
+<li>The <tt>'\e'</tt> escape in character and string literals.</li>
+
+<li>The C99/C++ boolean type, declared with the keywords <tt>bool</tt>
+or <tt>_Bool</tt>.</li>
+
+<li>Complex numbers, declared with the keywords <tt>complex</tt> or
+<tt>_Complex</tt>.</li>
+
+<li>Two complex number types: <tt>complex</tt> (aka
+<tt>complex&nbsp;double</tt>) and <tt>complex&nbsp;float</tt>.</li>
+
+<li>Vector types, declared with the GCC <tt>mode</tt> or
+<tt>vector_size</tt> attribute.</li>
+
+<li>Unnamed ('transparent') <tt>struct</tt>/<tt>union</tt> fields
+inside a <tt>struct</tt>/<tt>union</tt>.</li>
+
+<li>Incomplete <tt>enum</tt> declarations, handled like incomplete
+<tt>struct</tt> declarations.</li>
+
+<li>Unnamed <tt>enum</tt> fields inside a
+<tt>struct</tt>/<tt>union</tt>. This is similar to a scoped C++
+<tt>enum</tt>, except that declared constants are visible in the
+global namespace, too.</li>
+
+<li>Scoped <tt>static&nbsp;const</tt> declarations inside a
+<tt>struct</tt>/<tt>union</tt> (from C++).</li>
+
+<li>Zero-length arrays (<tt>[0]</tt>), empty
+<tt>struct</tt>/<tt>union</tt>, variable-length arrays (VLA,
+<tt>[?]</tt>) and variable-length structs (VLS, with a trailing
+VLA).</li>
+
+<li>C++ reference types (<tt>int&nbsp;&amp;x</tt>).</li>
+
+<li>Alternate GCC keywords with '<tt>__</tt>', e.g.
+<tt>__const__</tt>.</li>
+
+<li>GCC <tt>__attribute__</tt> with the following attributes:
+<tt>aligned</tt>, <tt>packed</tt>, <tt>mode</tt>,
+<tt>vector_size</tt>, <tt>cdecl</tt>, <tt>fastcall</tt>,
+<tt>stdcall</tt>, <tt>thiscall</tt>.</li>
+
+<li>The GCC <tt>__extension__</tt> keyword and the GCC
+<tt>__alignof__</tt> operator.</li>
+
+<li>GCC <tt>__asm__("symname")</tt> symbol name redirection for
+function declarations.</li>
+
+<li>MSVC keywords for fixed-length types: <tt>__int8</tt>,
+<tt>__int16</tt>, <tt>__int32</tt> and <tt>__int64</tt>.</li>
+
+<li>MSVC <tt>__cdecl</tt>, <tt>__fastcall</tt>, <tt>__stdcall</tt>,
+<tt>__thiscall</tt>, <tt>__ptr32</tt>, <tt>__ptr64</tt>,
+<tt>__declspec(align(n))</tt> and <tt>#pragma&nbsp;pack</tt>.</li>
+
+<li>All other GCC/MSVC-specific attributes are ignored.</li>
+
+</ul>
+<p>
+The following C&nbsp;types are pre-defined by the C&nbsp;parser (like
+a <tt>typedef</tt>, except re-declarations will be ignored):
+</p>
+<ul>
+
+<li>Vararg handling: <tt>va_list</tt>, <tt>__builtin_va_list</tt>,
+<tt>__gnuc_va_list</tt>.</li>
+
+<li>From <tt>&lt;stddef.h&gt;</tt>: <tt>ptrdiff_t</tt>,
+<tt>size_t</tt>, <tt>wchar_t</tt>.</li>
+
+<li>From <tt>&lt;stdint.h&gt;</tt>: <tt>int8_t</tt>, <tt>int16_t</tt>,
+<tt>int32_t</tt>, <tt>int64_t</tt>, <tt>uint8_t</tt>,
+<tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
+<tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
+
+</ul>
+<p>
+You're encouraged to use these types in preference to
+compiler-specific extensions or target-dependent standard types.
+E.g. <tt>char</tt> differs in signedness and <tt>long</tt> differs in
+size, depending on the target architecture and platform ABI.
+</p>
+<p>
+The following C&nbsp;features are <b>not</b> supported:
+</p>
+<ul>
+
+<li>A declaration must always have a type specifier; it doesn't
+default to an <tt>int</tt> type.</li>
+
+<li>Old-style empty function declarations (K&amp;R) are not allowed.
+All C&nbsp;functions must have a proper prototype declaration. A
+function declared without parameters (<tt>int&nbsp;foo();</tt>) is
+treated as a function taking zero arguments, like in C++.</li>
+
+<li>The <tt>long double</tt> C&nbsp;type is parsed correctly, but
+there's no support for the related conversions, accesses or arithmetic
+operations.</li>
+
+<li>Wide character strings and character literals are not
+supported.</li>
+
+<li><a href="#status">See below</a> for features that are currently
+not implemented.</li>
+
+</ul>
+
+<h2 id="convert">C Type Conversion Rules</h2>
+
+<h3 id="convert_tolua">Conversions from C&nbsp;types to Lua objects</h3>
+<p>
+These conversion rules apply for <em>read accesses</em> to
+C&nbsp;types: indexing pointers, arrays or
+<tt>struct</tt>/<tt>union</tt> types; reading external variables or
+constant values; retrieving return values from C&nbsp;calls:
+</p>
+<table class="convtable">
+<tr class="convhead">
+<td class="convin">Input</td>
+<td class="convop">Conversion</td>
+<td class="convout">Output</td>
+</tr>
+<tr class="odd separate">
+<td class="convin"><tt>int8_t</tt>, <tt>int16_t</tt></td><td class="convop">&rarr;<sup>sign-ext</sup> <tt>int32_t</tt> &rarr; <tt>double</tt></td><td class="convout">number</td></tr>
+<tr class="even">
+<td class="convin"><tt>uint8_t</tt>, <tt>uint16_t</tt></td><td class="convop">&rarr;<sup>zero-ext</sup> <tt>int32_t</tt> &rarr; <tt>double</tt></td><td class="convout">number</td></tr>
+<tr class="odd">
+<td class="convin"><tt>int32_t</tt>, <tt>uint32_t</tt></td><td class="convop">&rarr; <tt>double</tt></td><td class="convout">number</td></tr>
+<tr class="even">
+<td class="convin"><tt>int64_t</tt>, <tt>uint64_t</tt></td><td class="convop">boxed value</td><td class="convout">64 bit int cdata</td></tr>
+<tr class="odd separate">
+<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr; <tt>double</tt></td><td class="convout">number</td></tr>
+<tr class="even separate">
+<td class="convin"><tt>bool</tt></td><td class="convop">0 &rarr; <tt>false</tt>, otherwise <tt>true</tt></td><td class="convout">boolean</td></tr>
+<tr class="odd separate">
+<td class="convin"><tt>enum</tt></td><td class="convop">boxed value</td><td class="convout">enum cdata</td></tr>
+<tr class="even">
+<td class="convin">Complex number</td><td class="convop">boxed value</td><td class="convout">complex cdata</td></tr>
+<tr class="odd">
+<td class="convin">Vector</td><td class="convop">boxed value</td><td class="convout">vector cdata</td></tr>
+<tr class="even">
+<td class="convin">Pointer</td><td class="convop">boxed value</td><td class="convout">pointer cdata</td></tr>
+<tr class="odd separate">
+<td class="convin">Array</td><td class="convop">boxed reference</td><td class="convout">reference cdata</td></tr>
+<tr class="even">
+<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">boxed reference</td><td class="convout">reference cdata</td></tr>
+</table>
+<p>
+Bitfields are treated like their underlying type.
+</p>
+<p>
+Reference types are dereferenced <em>before</em> a conversion can take
+place &mdash; the conversion is applied to the C&nbsp;type pointed to
+by the reference.
+</p>
+
+<h3 id="convert_fromlua">Conversions from Lua objects to C&nbsp;types</h3>
+<p>
+These conversion rules apply for <em>write accesses</em> to
+C&nbsp;types: indexing pointers, arrays or
+<tt>struct</tt>/<tt>union</tt> types; initializing cdata objects;
+casts to C&nbsp;types; writing to external variables; passing
+arguments to C&nbsp;calls:
+</p>
+<table class="convtable">
+<tr class="convhead">
+<td class="convin">Input</td>
+<td class="convop">Conversion</td>
+<td class="convout">Output</td>
+</tr>
+<tr class="odd separate">
+<td class="convin">number</td><td class="convop">&rarr;</td><td class="convout"><tt>double</tt></td></tr>
+<tr class="even">
+<td class="convin">boolean</td><td class="convop"><tt>false</tt> &rarr; 0, <tt>true</tt> &rarr; 1</td><td class="convout"><tt>bool</tt></td></tr>
+<tr class="odd separate">
+<td class="convin">nil</td><td class="convop"><tt>NULL</tt> &rarr;</td><td class="convout"><tt>(void *)</tt></td></tr>
+<tr class="even">
+<td class="convin">lightuserdata</td><td class="convop">lightuserdata address &rarr;</td><td class="convout"><tt>(void *)</tt></td></tr>
+<tr class="odd">
+<td class="convin">userdata</td><td class="convop">userdata payload &rarr;</td><td class="convout"><tt>(void *)</tt></td></tr>
+<tr class="even">
+<td class="convin">io.* file</td><td class="convop">get FILE * handle &rarr;</td><td class="convout"><tt>(void *)</tt></td></tr>
+<tr class="odd separate">
+<td class="convin">string</td><td class="convop">match against <tt>enum</tt> constant</td><td class="convout"><tt>enum</tt></td></tr>
+<tr class="even">
+<td class="convin">string</td><td class="convop">copy string data + zero-byte</td><td class="convout"><tt>int8_t[]</tt>, <tt>uint8_t[]</tt></td></tr>
+<tr class="odd">
+<td class="convin">string</td><td class="convop">string data &rarr;</td><td class="convout"><tt>const char[]</tt></td></tr>
+<tr class="even separate">
+<td class="convin">function</td><td class="convop"><a href="#callback">create callback</a> &rarr;</td><td class="convout">C function type</td></tr>
+<tr class="odd separate">
+<td class="convin">table</td><td class="convop"><a href="#init_table">table initializer</a></td><td class="convout">Array</td></tr>
+<tr class="even">
+<td class="convin">table</td><td class="convop"><a href="#init_table">table initializer</a></td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr>
+<tr class="odd separate">
+<td class="convin">cdata</td><td class="convop">cdata payload &rarr;</td><td class="convout">C type</td></tr>
+</table>
+<p>
+If the result type of this conversion doesn't match the
+C&nbsp;type of the destination, the
+<a href="#convert_between">conversion rules between C&nbsp;types</a>
+are applied.
+</p>
+<p>
+Reference types are immutable after initialization ("no re-seating of
+references"). For initialization purposes or when passing values to
+reference parameters, they are treated like pointers. Note that unlike
+in C++, there's no way to implement automatic reference generation of
+variables under the Lua language semantics. If you want to call a
+function with a reference parameter, you need to explicitly pass a
+one-element array.
+</p>
+
+<h3 id="convert_between">Conversions between C&nbsp;types</h3>
+<p>
+These conversion rules are more or less the same as the standard
+C&nbsp;conversion rules. Some rules only apply to casts, or require
+pointer or type compatibility:
+</p>
+<table class="convtable">
+<tr class="convhead">
+<td class="convin">Input</td>
+<td class="convop">Conversion</td>
+<td class="convout">Output</td>
+</tr>
+<tr class="odd separate">
+<td class="convin">Signed integer</td><td class="convop">&rarr;<sup>narrow or sign-extend</sup></td><td class="convout">Integer</td></tr>
+<tr class="even">
+<td class="convin">Unsigned integer</td><td class="convop">&rarr;<sup>narrow or zero-extend</sup></td><td class="convout">Integer</td></tr>
+<tr class="odd">
+<td class="convin">Integer</td><td class="convop">&rarr;<sup>round</sup></td><td class="convout"><tt>double</tt>, <tt>float</tt></td></tr>
+<tr class="even">
+<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup> <tt>int32_t</tt> &rarr;<sup>narrow</sup></td><td class="convout"><tt>(u)int8_t</tt>, <tt>(u)int16_t</tt></td></tr>
+<tr class="odd">
+<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup></td><td class="convout"><tt>(u)int32_t</tt>, <tt>(u)int64_t</tt></td></tr>
+<tr class="even">
+<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>round</sup></td><td class="convout"><tt>float</tt>, <tt>double</tt></td></tr>
+<tr class="odd separate">
+<td class="convin">Number</td><td class="convop">n == 0 &rarr; 0, otherwise 1</td><td class="convout"><tt>bool</tt></td></tr>
+<tr class="even">
+<td class="convin"><tt>bool</tt></td><td class="convop"><tt>false</tt> &rarr; 0, <tt>true</tt> &rarr; 1</td><td class="convout">Number</td></tr>
+<tr class="odd separate">
+<td class="convin">Complex number</td><td class="convop">convert real part</td><td class="convout">Number</td></tr>
+<tr class="even">
+<td class="convin">Number</td><td class="convop">convert real part, imag = 0</td><td class="convout">Complex number</td></tr>
+<tr class="odd">
+<td class="convin">Complex number</td><td class="convop">convert real and imag part</td><td class="convout">Complex number</td></tr>
+<tr class="even separate">
+<td class="convin">Number</td><td class="convop">convert scalar and replicate</td><td class="convout">Vector</td></tr>
+<tr class="odd">
+<td class="convin">Vector</td><td class="convop">copy (same size)</td><td class="convout">Vector</td></tr>
+<tr class="even separate">
+<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr>
+<tr class="odd">
+<td class="convin">Array</td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr>
+<tr class="even">
+<td class="convin">Function</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr>
+<tr class="odd separate">
+<td class="convin">Number</td><td class="convop">convert via <tt>uintptr_t</tt> (cast)</td><td class="convout">Pointer</td></tr>
+<tr class="even">
+<td class="convin">Pointer</td><td class="convop">convert address (compat/cast)</td><td class="convout">Pointer</td></tr>
+<tr class="odd">
+<td class="convin">Pointer</td><td class="convop">convert address (cast)</td><td class="convout">Integer</td></tr>
+<tr class="even">
+<td class="convin">Array</td><td class="convop">convert base address (cast)</td><td class="convout">Integer</td></tr>
+<tr class="odd separate">
+<td class="convin">Array</td><td class="convop">copy (compat)</td><td class="convout">Array</td></tr>
+<tr class="even">
+<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">copy (identical type)</td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr>
+</table>
+<p>
+Bitfields or <tt>enum</tt> types are treated like their underlying
+type.
+</p>
+<p>
+Conversions not listed above will raise an error. E.g. it's not
+possible to convert a pointer to a complex number or vice versa.
+</p>
+
+<h3 id="convert_vararg">Conversions for vararg C&nbsp;function arguments</h3>
+<p>
+The following default conversion rules apply when passing Lua objects
+to the variable argument part of vararg C&nbsp;functions:
+</p>
+<table class="convtable">
+<tr class="convhead">
+<td class="convin">Input</td>
+<td class="convop">Conversion</td>
+<td class="convout">Output</td>
+</tr>
+<tr class="odd separate">
+<td class="convin">number</td><td class="convop">&rarr;</td><td class="convout"><tt>double</tt></td></tr>
+<tr class="even">
+<td class="convin">boolean</td><td class="convop"><tt>false</tt> &rarr; 0, <tt>true</tt> &rarr; 1</td><td class="convout"><tt>bool</tt></td></tr>
+<tr class="odd separate">
+<td class="convin">nil</td><td class="convop"><tt>NULL</tt> &rarr;</td><td class="convout"><tt>(void *)</tt></td></tr>
+<tr class="even">
+<td class="convin">userdata</td><td class="convop">userdata payload &rarr;</td><td class="convout"><tt>(void *)</tt></td></tr>
+<tr class="odd">
+<td class="convin">lightuserdata</td><td class="convop">lightuserdata address &rarr;</td><td class="convout"><tt>(void *)</tt></td></tr>
+<tr class="even separate">
+<td class="convin">string</td><td class="convop">string data &rarr;</td><td class="convout"><tt>const char *</tt></td></tr>
+<tr class="odd separate">
+<td class="convin"><tt>float</tt> cdata</td><td class="convop">&rarr;</td><td class="convout"><tt>double</tt></td></tr>
+<tr class="even">
+<td class="convin">Array cdata</td><td class="convop">take base address</td><td class="convout">Element pointer</td></tr>
+<tr class="odd">
+<td class="convin"><tt>struct</tt>/<tt>union</tt> cdata</td><td class="convop">take base address</td><td class="convout"><tt>struct</tt>/<tt>union</tt> pointer</td></tr>
+<tr class="even">
+<td class="convin">Function cdata</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr>
+<tr class="odd">
+<td class="convin">Any other cdata</td><td class="convop">no conversion</td><td class="convout">C type</td></tr>
+</table>
+<p>
+To pass a Lua object, other than a cdata object, as a specific type,
+you need to override the conversion rules: create a temporary cdata
+object with a constructor or a cast and initialize it with the value
+to pass:
+</p>
+<p>
+Assuming <tt>x</tt> is a Lua number, here's how to pass it as an
+integer to a vararg function:
+</p>
+<pre class="code">
+ffi.cdef[[
+int printf(const char *fmt, ...);
+]]
+ffi.C.printf("integer value: %d\n", ffi.new("int", x))
+</pre>
+<p>
+If you don't do this, the default Lua number &rarr; <tt>double</tt>
+conversion rule applies. A vararg C&nbsp;function expecting an integer
+will see a garbled or uninitialized value.
+</p>
+
+<h2 id="init">Initializers</h2>
+<p>
+Creating a cdata object with
+<a href="ext_ffi_api.html#ffi_new"><tt>ffi.new()</tt></a> or the
+equivalent constructor syntax always initializes its contents, too.
+Different rules apply, depending on the number of optional
+initializers and the C&nbsp;types involved:
+</p>
+<ul>
+<li>If no initializers are given, the object is filled with zero bytes.</li>
+
+<li>Scalar types (numbers and pointers) accept a single initializer.
+The Lua object is <a href="#convert_fromlua">converted to the scalar
+C&nbsp;type</a>.</li>
+
+<li>Valarrays (complex numbers and vectors) are treated like scalars
+when a single initializer is given. Otherwise they are treated like
+regular arrays.</li>
+
+<li>Aggregate types (arrays and structs) accept either a single cdata
+initializer of the same type (copy constructor), a single
+<a href="#init_table">table initializer</a>, or a flat list of
+initializers.</li>
+
+<li>The elements of an array are initialized, starting at index zero.
+If a single initializer is given for an array, it's repeated for all
+remaining elements. This doesn't happen if two or more initializers
+are given: all remaining uninitialized elements are filled with zero
+bytes.</li>
+
+<li>Byte arrays may also be initialized with a Lua string. This copies
+the whole string plus a terminating zero-byte. The copy stops early only
+if the array has a known, fixed size.</li>
+
+<li>The fields of a <tt>struct</tt> are initialized in the order of
+their declaration. Uninitialized fields are filled with zero
+bytes.</li>
+
+<li>Only the first field of a <tt>union</tt> can be initialized with a
+flat initializer.</li>
+
+<li>Elements or fields which are aggregates themselves are initialized
+with a <em>single</em> initializer, but this may be a table
+initializer or a compatible aggregate.</li>
+
+<li>Excess initializers cause an error.</li>
+
+</ul>
+
+<h2 id="init_table">Table Initializers</h2>
+<p>
+The following rules apply if a Lua table is used to initialize an
+Array or a <tt>struct</tt>/<tt>union</tt>:
+</p>
+<ul>
+
+<li>If the table index <tt>[0]</tt> is non-<tt>nil</tt>, then the
+table is assumed to be zero-based. Otherwise it's assumed to be
+one-based.</li>
+
+<li>Array elements, starting at index zero, are initialized one-by-one
+with the consecutive table elements, starting at either index
+<tt>[0]</tt> or <tt>[1]</tt>. This process stops at the first
+<tt>nil</tt> table element.</li>
+
+<li>If exactly one array element was initialized, it's repeated for
+all the remaining elements. Otherwise all remaining uninitialized
+elements are filled with zero bytes.</li>
+
+<li>The above logic only applies to arrays with a known fixed size.
+A VLA is only initialized with the element(s) given in the table.
+Depending on the use case, you may need to explicitly add a
+<tt>NULL</tt> or <tt>0</tt> terminator to a VLA.</li>
+
+<li>A <tt>struct</tt>/<tt>union</tt> can be initialized in the
+order of the declaration of its fields. Each field is initialized with
+consecutive table elements, starting at either index <tt>[0]</tt>
+or <tt>[1]</tt>. This process stops at the first <tt>nil</tt> table
+element.</li>
+
+<li>Otherwise, if neither index <tt>[0]</tt> nor <tt>[1]</tt> is present,
+a <tt>struct</tt>/<tt>union</tt> is initialized by looking up each field
+name (as a string key) in the table. Each non-<tt>nil</tt> value is
+used to initialize the corresponding field.</li>
+
+<li>Uninitialized fields of a <tt>struct</tt> are filled with zero
+bytes, except for the trailing VLA of a VLS.</li>
+
+<li>Initialization of a <tt>union</tt> stops after one field has been
+initialized. If no field has been initialized, the <tt>union</tt> is
+filled with zero bytes.</li>
+
+<li>Elements or fields which are aggregates themselves are initialized
+with a <em>single</em> initializer, but this may be a nested table
+initializer (or a compatible aggregate).</li>
+
+<li>Excess initializers for an array cause an error. Excess
+initializers for a <tt>struct</tt>/<tt>union</tt> are ignored.
+Unrelated table entries are ignored, too.</li>
+
+</ul>
+<p>
+Example:
+</p>
+<pre class="code">
+local ffi = require("ffi")
+
+ffi.cdef[[
+struct foo { int a, b; };
+union bar { int i; double d; };
+struct nested { int x; struct foo y; };
+]]
+
+ffi.new("int[3]", {})            --> 0, 0, 0
+ffi.new("int[3]", {1})           --> 1, 1, 1
+ffi.new("int[3]", {1,2})         --> 1, 2, 0
+ffi.new("int[3]", {1,2,3})       --> 1, 2, 3
+ffi.new("int[3]", {[0]=1})       --> 1, 1, 1
+ffi.new("int[3]", {[0]=1,2})     --> 1, 2, 0
+ffi.new("int[3]", {[0]=1,2,3})   --> 1, 2, 3
+ffi.new("int[3]", {[0]=1,2,3,4}) --> error: too many initializers
+
+ffi.new("struct foo", {})            --> a = 0, b = 0
+ffi.new("struct foo", {1})           --> a = 1, b = 0
+ffi.new("struct foo", {1,2})         --> a = 1, b = 2
+ffi.new("struct foo", {[0]=1,2})     --> a = 1, b = 2
+ffi.new("struct foo", {b=2})         --> a = 0, b = 2
+ffi.new("struct foo", {a=1,b=2,c=3}) --> a = 1, b = 2  'c' is ignored
+
+ffi.new("union bar", {})        --> i = 0, d = 0.0
+ffi.new("union bar", {1})       --> i = 1, d = ?
+ffi.new("union bar", {[0]=1,2}) --> i = 1, d = ?    '2' is ignored
+ffi.new("union bar", {d=2})     --> i = ?, d = 2.0
+
+ffi.new("struct nested", {1,{2,3}})     --> x = 1, y.a = 2, y.b = 3
+ffi.new("struct nested", {x=1,y={2,3}}) --> x = 1, y.a = 2, y.b = 3
+</pre>
+
+<h2 id="cdata_ops">Operations on cdata Objects</h2>
+<p>
+All of the standard Lua operators can be applied to cdata objects or a
+mix of a cdata object and another Lua object. The following list shows
+the pre-defined operations.
+</p>
+<p>
+Reference types are dereferenced <em>before</em> performing each of
+the operations below &mdash; the operation is applied to the
+C&nbsp;type pointed to by the reference.
+</p>
+<p>
+The pre-defined operations are always tried first before deferring to a
+metamethod or index table (if any) for the corresponding ctype (except
+for <tt>__new</tt>). An error is raised if the metamethod lookup or
+index table lookup fails.
+</p>
+
+<h3 id="cdata_array">Indexing a cdata object</h3>
+<ul>
+
+<li><b>Indexing a pointer/array</b>: a cdata pointer/array can be
+indexed by a cdata number or a Lua number. The element address is
+computed as the base address plus the number value multiplied by the
+element size in bytes. A read access loads the element value and
+<a href="#convert_tolua">converts it to a Lua object</a>. A write
+access <a href="#convert_fromlua">converts a Lua object to the element
+type</a> and stores the converted value to the element. An error is
+raised if the element size is undefined or a write access to a
+constant element is attempted.</li>
+
+<li><b>Dereferencing a <tt>struct</tt>/<tt>union</tt> field</b>: a
+cdata <tt>struct</tt>/<tt>union</tt> or a pointer to a
+<tt>struct</tt>/<tt>union</tt> can be dereferenced by a string key,
+giving the field name. The field address is computed as the base
+address plus the relative offset of the field. A read access loads the
+field value and <a href="#convert_tolua">converts it to a Lua
+object</a>. A write access <a href="#convert_fromlua">converts a Lua
+object to the field type</a> and stores the converted value to the
+field. An error is raised if a write access to a constant
+<tt>struct</tt>/<tt>union</tt> or a constant field is attempted.
+Scoped enum constants or static constants are treated like a constant
+field.</li>
+
+<li><b>Indexing a complex number</b>: a complex number can be indexed
+either by a cdata number or a Lua number with the values 0 or 1, or by
+the strings <tt>"re"</tt> or <tt>"im"</tt>. A read access loads the
+real part (<tt>[0]</tt>, <tt>.re</tt>) or the imaginary part
+(<tt>[1]</tt>, <tt>.im</tt>) part of a complex number and
+<a href="#convert_tolua">converts it to a Lua number</a>. The
+sub-parts of a complex number are immutable &mdash; assigning to an
+index of a complex number raises an error. Accessing out-of-bound
+indexes returns unspecified results, but is guaranteed not to trigger
+memory access violations.</li>
+
+<li><b>Indexing a vector</b>: a vector is treated like an array for
+indexing purposes, except the vector elements are immutable &mdash;
+assigning to an index of a vector raises an error.</li>
+
+</ul>
+<p>
+A ctype object can be indexed with a string key, too. The only
+pre-defined operation is reading scoped constants of
+<tt>struct</tt>/<tt>union</tt> types. All other accesses defer
+to the corresponding metamethods or index tables (if any).
+</p>
+<p>
+Note: since there's (deliberately) no address-of operator, a cdata
+object holding a value type is effectively immutable after
+initialization. The JIT compiler benefits from this fact when applying
+certain optimizations.
+</p>
+<p>
+As a consequence, the <em>elements</em> of complex numbers and
+vectors are immutable. But the elements of an aggregate holding these
+types <em>may</em> be modified of course. I.e. you cannot assign to
+<tt>foo.c.im</tt>, but you can assign a (newly created) complex number
+to <tt>foo.c</tt>.
+</p>
+<p>
+The JIT compiler implements strict aliasing rules: accesses to different
+types do <b>not</b> alias, except for differences in signedness (this
+applies even to <tt>char</tt> pointers, unlike C99). Type punning
+through unions is explicitly detected and allowed.
+</p>
+
+<h3 id="cdata_call">Calling a cdata object</h3>
+<ul>
+
+<li><b>Constructor</b>: a ctype object can be called and used as a
+<a href="ext_ffi_api.html#ffi_new">constructor</a>. This is equivalent
+to <tt>ffi.new(ct, ...)</tt>, unless a <tt>__new</tt> metamethod is
+defined. The <tt>__new</tt> metamethod is called with the ctype object
+plus any other arguments passed to the contructor. Note that you have to
+use <tt>ffi.new</tt> inside of it, since calling <tt>ct(...)</tt> would
+cause infinite recursion.</li>
+
+<li><b>C&nbsp;function call</b>: a cdata function or cdata function
+pointer can be called. The passed arguments are
+<a href="#convert_fromlua">converted to the C&nbsp;types</a> of the
+parameters given by the function declaration. Arguments passed to the
+variable argument part of vararg C&nbsp;function use
+<a href="#convert_vararg">special conversion rules</a>. This
+C&nbsp;function is called and the return value (if any) is
+<a href="#convert_tolua">converted to a Lua object</a>.<br>
+On Windows/x86 systems, <tt>__stdcall</tt> functions are automatically
+detected and a function declared as <tt>__cdecl</tt> (the default) is
+silently fixed up after the first call.</li>
+
+</ul>
+
+<h3 id="cdata_arith">Arithmetic on cdata objects</h3>
+<ul>
+
+<li><b>Pointer arithmetic</b>: a cdata pointer/array and a cdata
+number or a Lua number can be added or subtracted. The number must be
+on the right hand side for a subtraction. The result is a pointer of
+the same type with an address plus or minus the number value
+multiplied by the element size in bytes. An error is raised if the
+element size is undefined.</li>
+
+<li><b>Pointer difference</b>: two compatible cdata pointers/arrays
+can be subtracted. The result is the difference between their
+addresses, divided by the element size in bytes. An error is raised if
+the element size is undefined or zero.</li>
+
+<li><b>64&nbsp;bit integer arithmetic</b>: the standard arithmetic
+operators (<tt>+&nbsp;-&nbsp;*&nbsp;/&nbsp;%&nbsp;^</tt> and unary
+minus) can be applied to two cdata numbers, or a cdata number and a
+Lua number. If one of them is an <tt>uint64_t</tt>, the other side is
+converted to an <tt>uint64_t</tt> and an unsigned arithmetic operation
+is performed. Otherwise both sides are converted to an
+<tt>int64_t</tt> and a signed arithmetic operation is performed. The
+result is a boxed 64&nbsp;bit cdata object.<br>
+
+If one of the operands is an <tt>enum</tt> and the other operand is a
+string, the string is converted to the value of a matching <tt>enum</tt>
+constant before the above conversion.<br>
+
+These rules ensure that 64&nbsp;bit integers are "sticky". Any
+expression involving at least one 64&nbsp;bit integer operand results
+in another one. The undefined cases for the division, modulo and power
+operators return <tt>2LL&nbsp;^&nbsp;63</tt> or
+<tt>2ULL&nbsp;^&nbsp;63</tt>.<br>
+
+You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
+number (e.g. for regular floating-point calculations) with
+<tt>tonumber()</tt>. But note this may incur a precision loss.</li>
+
+</ul>
+
+<h3 id="cdata_comp">Comparisons of cdata objects</h3>
+<ul>
+
+<li><b>Pointer comparison</b>: two compatible cdata pointers/arrays
+can be compared. The result is the same as an unsigned comparison of
+their addresses. <tt>nil</tt> is treated like a <tt>NULL</tt> pointer,
+which is compatible with any other pointer type.</li>
+
+<li><b>64&nbsp;bit integer comparison</b>: two cdata numbers, or a
+cdata number and a Lua number can be compared with each other. If one
+of them is an <tt>uint64_t</tt>, the other side is converted to an
+<tt>uint64_t</tt> and an unsigned comparison is performed. Otherwise
+both sides are converted to an <tt>int64_t</tt> and a signed
+comparison is performed.<br>
+
+If one of the operands is an <tt>enum</tt> and the other operand is a
+string, the string is converted to the value of a matching <tt>enum</tt>
+constant before the above conversion.<br>
+
+<li><b>Comparisons for equality/inequality</b> never raise an error.
+Even incompatible pointers can be compared for equality by address. Any
+other incompatible comparison (also with non-cdata objects) treats the
+two sides as unequal.</li>
+
+</ul>
+
+<h3 id="cdata_key">cdata objects as table keys</h3>
+<p>
+Lua tables may be indexed by cdata objects, but this doesn't provide
+any useful semantics &mdash; <b>cdata objects are unsuitable as table
+keys!</b>
+</p>
+<p>
+A cdata object is treated like any other garbage-collected object and
+is hashed and compared by its address for table indexing. Since
+there's no interning for cdata value types, the same value may be
+boxed in different cdata objects with different addresses. Thus
+<tt>t[1LL+1LL]</tt> and <tt>t[2LL]</tt> usually <b>do not</b> point to
+the same hash slot and they certainly <b>do not</b> point to the same
+hash slot as <tt>t[2]</tt>.
+</p>
+<p>
+It would seriously drive up implementation complexity and slow down
+the common case, if one were to add extra handling for by-value
+hashing and comparisons to Lua tables. Given the ubiquity of their use
+inside the VM, this is not acceptable.
+</p>
+<p>
+There are three viable alternatives, if you really need to use cdata
+objects as keys:
+</p>
+<ul>
+
+<li>If you can get by with the precision of Lua numbers
+(52&nbsp;bits), then use <tt>tonumber()</tt> on a cdata number or
+combine multiple fields of a cdata aggregate to a Lua number. Then use
+the resulting Lua number as a key when indexing tables.<br>
+One obvious benefit: <tt>t[tonumber(2LL)]</tt> <b>does</b> point to
+the same slot as <tt>t[2]</tt>.</li>
+
+<li>Otherwise use either <tt>tostring()</tt> on 64&nbsp;bit integers
+or complex numbers or combine multiple fields of a cdata aggregate to
+a Lua string (e.g. with
+<a href="ext_ffi_api.html#ffi_string"><tt>ffi.string()</tt></a>). Then
+use the resulting Lua string as a key when indexing tables.</li>
+
+<li>Create your own specialized hash table implementation using the
+C&nbsp;types provided by the FFI library, just like you would in
+C&nbsp;code. Ultimately this may give much better performance than the
+other alternatives or what a generic by-value hash table could
+possibly provide.</li>
+
+</ul>
+
+<h2 id="param">Parameterized Types</h2>
+<p>
+To facilitate some abstractions, the two functions
+<a href="ext_ffi_api.html#ffi_typeof"><tt>ffi.typeof</tt></a> and
+<a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a> support
+parameterized types in C&nbsp;declarations. Note: none of the other API
+functions taking a cdecl allow this.
+</p>
+<p>
+Any place you can write a <b><tt>typedef</tt> name</b>, an
+<b>identifier</b> or a <b>number</b> in a declaration, you can write
+<tt>$</tt> (the dollar sign) instead. These placeholders are replaced in
+order of appearance with the arguments following the cdecl string:
+</p>
+<pre class="code">
+-- Declare a struct with a parameterized field type and name:
+ffi.cdef([[
+typedef struct { $ $; } foo_t;
+]], type1, name1)
+
+-- Anonymous struct with dynamic names:
+local bar_t = ffi.typeof("struct { int $, $; }", name1, name2)
+-- Derived pointer type:
+local bar_ptr_t = ffi.typeof("$ *", bar_t)
+
+-- Parameterized dimensions work even where a VLA won't work:
+local matrix_t = ffi.typeof("uint8_t[$][$]", width, height)
+</pre>
+<p>
+Caveat: this is <em>not</em> simple text substitution! A passed ctype or
+cdata object is treated like the underlying type, a passed string is
+considered an identifier and a number is considered a number. You must
+not mix this up: e.g. passing <tt>"int"</tt> as a string doesn't work in
+place of a type, you'd need to use <tt>ffi.typeof("int")</tt> instead.
+</p>
+<p>
+The main use for parameterized types are libraries implementing abstract
+data types
+(<a href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8"><span class="ext">&raquo;</span>&nbsp;example</a>),
+similar to what can be achieved with C++ template metaprogramming.
+Another use case are derived types of anonymous structs, which avoids
+pollution of the global struct namespace.
+</p>
+<p>
+Please note that parameterized types are a nice tool and indispensable
+for certain use cases. But you'll want to use them sparingly in regular
+code, e.g. when all types are actually fixed.
+</p>
+
+<h2 id="gc">Garbage Collection of cdata Objects</h2>
+<p>
+All explicitly (<tt>ffi.new()</tt>, <tt>ffi.cast()</tt> etc.) or
+implicitly (accessors) created cdata objects are garbage collected.
+You need to ensure to retain valid references to cdata objects
+somewhere on a Lua stack, an upvalue or in a Lua table while they are
+still in use. Once the last reference to a cdata object is gone, the
+garbage collector will automatically free the memory used by it (at
+the end of the next GC cycle).
+</p>
+<p>
+Please note that pointers themselves are cdata objects, however they
+are <b>not</b> followed by the garbage collector. So e.g. if you
+assign a cdata array to a pointer, you must keep the cdata object
+holding the array alive as long as the pointer is still in use:
+</p>
+<pre class="code">
+ffi.cdef[[
+typedef struct { int *a; } foo_t;
+]]
+
+local s = ffi.new("foo_t", ffi.new("int[10]")) -- <span style="color:#c00000;">WRONG!</span>
+
+local a = ffi.new("int[10]") -- <span style="color:#00a000;">OK</span>
+local s = ffi.new("foo_t", a)
+-- Now do something with 's', but keep 'a' alive until you're done.
+</pre>
+<p>
+Similar rules apply for Lua strings which are implicitly converted to
+<tt>"const&nbsp;char&nbsp;*"</tt>: the string object itself must be
+referenced somewhere or it'll be garbage collected eventually. The
+pointer will then point to stale data, which may have already been
+overwritten. Note that <em>string literals</em> are automatically kept
+alive as long as the function containing it (actually its prototype)
+is not garbage collected.
+</p>
+<p>
+Objects which are passed as an argument to an external C&nbsp;function
+are kept alive until the call returns. So it's generally safe to
+create temporary cdata objects in argument lists. This is a common
+idiom for <a href="#convert_vararg">passing specific C&nbsp;types to
+vararg functions</a>.
+</p>
+<p>
+Memory areas returned by C functions (e.g. from <tt>malloc()</tt>)
+must be manually managed, of course (or use
+<a href="ext_ffi_api.html#ffi_gc"><tt>ffi.gc()</tt></a>). Pointers to
+cdata objects are indistinguishable from pointers returned by C
+functions (which is one of the reasons why the GC cannot follow them).
+</p>
+
+<h2 id="callback">Callbacks</h2>
+<p>
+The LuaJIT FFI automatically generates special callback functions
+whenever a Lua function is converted to a C&nbsp;function pointer. This
+associates the generated callback function pointer with the C&nbsp;type
+of the function pointer and the Lua function object (closure).
+</p>
+<p>
+This can happen implicitly due to the usual conversions, e.g. when
+passing a Lua function to a function pointer argument. Or you can use
+<tt>ffi.cast()</tt> to explicitly cast a Lua function to a
+C&nbsp;function pointer.
+</p>
+<p>
+Currently only certain C&nbsp;function types can be used as callback
+functions. Neither C&nbsp;vararg functions nor functions with
+pass-by-value aggregate argument or result types are supported. There
+are no restrictions for the kind of Lua functions that can be called
+from the callback &mdash; no checks for the proper number of arguments
+are made. The return value of the Lua function will be converted to the
+result type and an error will be thrown for invalid conversions.
+</p>
+<p>
+It's allowed to throw errors across a callback invocation, but it's not
+advisable in general. Do this only if you know the C&nbsp;function, that
+called the callback, copes with the forced stack unwinding and doesn't
+leak resources.
+</p>
+<p>
+One thing that's not allowed, is to let an FFI call into a C&nbsp;function
+get JIT-compiled, which in turn calls a callback, calling into Lua again.
+Usually this attempt is caught by the interpreter first and the
+C&nbsp;function is blacklisted for compilation.
+</p>
+<p>
+However, this heuristic may fail under specific circumstances: e.g. a
+message polling function might not run Lua callbacks right away and the call
+gets JIT-compiled. If it later happens to call back into Lua (e.g. a rarely
+invoked error callback), you'll get a VM PANIC with the message
+<tt>"bad callback"</tt>. Then you'll need to manually turn off
+JIT-compilation with
+<a href="ext_jit.html#jit_onoff_func"><tt>jit.off()</tt></a> for the
+surrounding Lua function that invokes such a message polling function (or
+similar).
+</p>
+
+<h3 id="callback_resources">Callback resource handling</h3>
+<p>
+Callbacks take up resources &mdash; you can only have a limited number
+of them at the same time (500&nbsp;-&nbsp;1000, depending on the
+architecture). The associated Lua functions are anchored to prevent
+garbage collection, too.
+</p>
+<p>
+<b>Callbacks due to implicit conversions are permanent!</b> There is no
+way to guess their lifetime, since the C&nbsp;side might store the
+function pointer for later use (typical for GUI toolkits). The associated
+resources cannot be reclaimed until termination:
+</p>
+<pre class="code">
+ffi.cdef[[
+typedef int (__stdcall *WNDENUMPROC)(void *hwnd, intptr_t l);
+int EnumWindows(WNDENUMPROC func, intptr_t l);
+]]
+
+-- Implicit conversion to a callback via function pointer argument.
+local count = 0
+ffi.C.EnumWindows(function(hwnd, l)
+  count = count + 1
+  return true
+end, 0)
+-- The callback is permanent and its resources cannot be reclaimed!
+-- Ok, so this may not be a problem, if you do this only once.
+</pre>
+<p>
+Note: this example shows that you <em>must</em> properly declare
+<tt>__stdcall</tt> callbacks on Windows/x86 systems. The calling
+convention cannot be automatically detected, unlike for
+<tt>__stdcall</tt> calls <em>to</em> Windows functions.
+</p>
+<p>
+For some use cases it's necessary to free up the resources or to
+dynamically redirect callbacks. Use an explicit cast to a
+C&nbsp;function pointer and keep the resulting cdata object. Then use
+the <a href="ext_ffi_api.html#callback_free"><tt>cb:free()</tt></a>
+or <a href="ext_ffi_api.html#callback_set"><tt>cb:set()</tt></a> methods
+on the cdata object:
+</p>
+<pre class="code">
+-- Explicitly convert to a callback via cast.
+local count = 0
+local cb = ffi.cast("WNDENUMPROC", function(hwnd, l)
+  count = count + 1
+  return true
+end)
+
+-- Pass it to a C function.
+ffi.C.EnumWindows(cb, 0)
+-- EnumWindows doesn't need the callback after it returns, so free it.
+
+cb:free()
+-- The callback function pointer is no longer valid and its resources
+-- will be reclaimed. The created Lua closure will be garbage collected.
+</pre>
+
+<h3 id="callback_performance">Callback performance</h3>
+<p>
+<b>Callbacks are slow!</b> First, the C&nbsp;to Lua transition itself
+has an unavoidable cost, similar to a <tt>lua_call()</tt> or
+<tt>lua_pcall()</tt>. Argument and result marshalling add to that cost.
+And finally, neither the C&nbsp;compiler nor LuaJIT can inline or
+optimize across the language barrier and hoist repeated computations out
+of a callback function.
+</p>
+<p>
+Do not use callbacks for performance-sensitive work: e.g. consider a
+numerical integration routine which takes a user-defined function to
+integrate over. It's a bad idea to call a user-defined Lua function from
+C&nbsp;code millions of times. The callback overhead will be absolutely
+detrimental for performance.
+</p>
+<p>
+It's considerably faster to write the numerical integration routine
+itself in Lua &mdash; the JIT compiler will be able to inline the
+user-defined function and optimize it together with its calling context,
+with very competitive performance.
+</p>
+<p>
+As a general guideline: <b>use callbacks only when you must</b>, because
+of existing C&nbsp;APIs. E.g. callback performance is irrelevant for a
+GUI application, which waits for user input most of the time, anyway.
+</p>
+<p>
+For new designs <b>avoid push-style APIs</b>: a C&nbsp;function repeatedly
+calling a callback for each result. Instead <b>use pull-style APIs</b>:
+call a C&nbsp;function repeatedly to get a new result. Calls from Lua
+to C via the FFI are much faster than the other way round. Most well-designed
+libraries already use pull-style APIs (read/write, get/put).
+</p>
+
+<h2 id="clib">C Library Namespaces</h2>
+<p>
+A C&nbsp;library namespace is a special kind of object which allows
+access to the symbols contained in shared libraries or the default
+symbol namespace. The default
+<a href="ext_ffi_api.html#ffi_C"><tt>ffi.C</tt></a> namespace is
+automatically created when the FFI library is loaded. C&nbsp;library
+namespaces for specific shared libraries may be created with the
+<a href="ext_ffi_api.html#ffi_load"><tt>ffi.load()</tt></a> API
+function.
+</p>
+<p>
+Indexing a C&nbsp;library namespace object with a symbol name (a Lua
+string) automatically binds it to the library. First the symbol type
+is resolved &mdash; it must have been declared with
+<a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a>. Then the
+symbol address is resolved by searching for the symbol name in the
+associated shared libraries or the default symbol namespace. Finally,
+the resulting binding between the symbol name, the symbol type and its
+address is cached. Missing symbol declarations or nonexistent symbol
+names cause an error.
+</p>
+<p>
+This is what happens on a <b>read access</b> for the different kinds of
+symbols:
+</p>
+<ul>
+
+<li>External functions: a cdata object with the type of the function
+and its address is returned.</li>
+
+<li>External variables: the symbol address is dereferenced and the
+loaded value is <a href="#convert_tolua">converted to a Lua object</a>
+and returned.</li>
+
+<li>Constant values (<tt>static&nbsp;const</tt> or <tt>enum</tt>
+constants): the constant is <a href="#convert_tolua">converted to a
+Lua object</a> and returned.</li>
+
+</ul>
+<p>
+This is what happens on a <b>write access</b>:
+</p>
+<ul>
+
+<li>External variables: the value to be written is
+<a href="#convert_fromlua">converted to the C&nbsp;type</a> of the
+variable and then stored at the symbol address.</li>
+
+<li>Writing to constant variables or to any other symbol type causes
+an error, like any other attempted write to a constant location.</li>
+
+</ul>
+<p>
+C&nbsp;library namespaces themselves are garbage collected objects. If
+the last reference to the namespace object is gone, the garbage
+collector will eventually release the shared library reference and
+remove all memory associated with the namespace. Since this may
+trigger the removal of the shared library from the memory of the
+running process, it's generally <em>not safe</em> to use function
+cdata objects obtained from a library if the namespace object may be
+unreferenced.
+</p>
+<p>
+Performance notice: the JIT compiler specializes to the identity of
+namespace objects and to the strings used to index it. This
+effectively turns function cdata objects into constants. It's not
+useful and actually counter-productive to explicitly cache these
+function objects, e.g. <tt>local strlen = ffi.C.strlen</tt>. OTOH it
+<em>is</em> useful to cache the namespace itself, e.g. <tt>local C =
+ffi.C</tt>.
+</p>
+
+<h2 id="policy">No Hand-holding!</h2>
+<p>
+The FFI library has been designed as <b>a low-level library</b>. The
+goal is to interface with C&nbsp;code and C&nbsp;data types with a
+minimum of overhead. This means <b>you can do anything you can do
+from&nbsp;C</b>: access all memory, overwrite anything in memory, call
+machine code at any memory address and so on.
+</p>
+<p>
+The FFI library provides <b>no memory safety</b>, unlike regular Lua
+code. It will happily allow you to dereference a <tt>NULL</tt>
+pointer, to access arrays out of bounds or to misdeclare
+C&nbsp;functions. If you make a mistake, your application might crash,
+just like equivalent C&nbsp;code would.
+</p>
+<p>
+This behavior is inevitable, since the goal is to provide full
+interoperability with C&nbsp;code. Adding extra safety measures, like
+bounds checks, would be futile. There's no way to detect
+misdeclarations of C&nbsp;functions, since shared libraries only
+provide symbol names, but no type information. Likewise there's no way
+to infer the valid range of indexes for a returned pointer.
+</p>
+<p>
+Again: the FFI library is a low-level library. This implies it needs
+to be used with care, but it's flexibility and performance often
+outweigh this concern. If you're a C or C++ developer, it'll be easy
+to apply your existing knowledge. OTOH writing code for the FFI
+library is not for the faint of heart and probably shouldn't be the
+first exercise for someone with little experience in Lua, C or C++.
+</p>
+<p>
+As a corollary of the above, the FFI library is <b>not safe for use by
+untrusted Lua code</b>. If you're sandboxing untrusted Lua code, you
+definitely don't want to give this code access to the FFI library or
+to <em>any</em> cdata object (except 64&nbsp;bit integers or complex
+numbers). Any properly engineered Lua sandbox needs to provide safety
+wrappers for many of the standard Lua library functions &mdash;
+similar wrappers need to be written for high-level operations on FFI
+data types, too.
+</p>
+
+<h2 id="status">Current Status</h2>
+<p>
+The initial release of the FFI library has some limitations and is
+missing some features. Most of these will be fixed in future releases.
+</p>
+<p>
+<a href="#clang">C language support</a> is
+currently incomplete:
+</p>
+<ul>
+<li>C&nbsp;declarations are not passed through a C&nbsp;pre-processor,
+yet.</li>
+<li>The C&nbsp;parser is able to evaluate most constant expressions
+commonly found in C&nbsp;header files. However it doesn't handle the
+full range of C&nbsp;expression semantics and may fail for some
+obscure constructs.</li>
+<li><tt>static const</tt> declarations only work for integer types
+up to 32&nbsp;bits. Neither declaring string constants nor
+floating-point constants is supported.</li>
+<li>Packed <tt>struct</tt> bitfields that cross container boundaries
+are not implemented.</li>
+<li>Native vector types may be defined with the GCC <tt>mode</tt> or
+<tt>vector_size</tt> attribute. But no operations other than loading,
+storing and initializing them are supported, yet.</li>
+<li>The <tt>volatile</tt> type qualifier is currently ignored by
+compiled code.</li>
+<li><a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a> silently
+ignores most re-declarations. Note: avoid re-declarations which do not
+conform to C99. The implementation will eventually be changed to
+perform strict checks.</li>
+</ul>
+<p>
+The JIT compiler already handles a large subset of all FFI operations.
+It automatically falls back to the interpreter for unimplemented
+operations (you can check for this with the
+<a href="running.html#opt_j"><tt>-jv</tt></a> command line option).
+The following operations are currently not compiled and may exhibit
+suboptimal performance, especially when used in inner loops:
+</p>
+<ul>
+<li>Bitfield accesses and initializations.</li>
+<li>Vector operations.</li>
+<li>Table initializers.</li>
+<li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
+<li>Allocations of variable-length arrays or structs.</li>
+<li>Allocations of C&nbsp;types with a size &gt; 128&nbsp;bytes or an
+alignment &gt; 8&nbsp;bytes.</li>
+<li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
+<li>Pointer differences for element sizes that are not a power of
+two.</li>
+<li>Calls to C&nbsp;functions with aggregates passed or returned by
+value.</li>
+<li>Calls to ctype metamethods which are not plain functions.</li>
+<li>ctype <tt>__newindex</tt> tables and non-string lookups in ctype
+<tt>__index</tt> tables.</li>
+<li><tt>tostring()</tt> for cdata types.</li>
+<li>Calls to <tt>ffi.cdef()</tt>, <tt>ffi.load()</tt> and
+<tt>ffi.metatype()</tt>.</li>
+</ul>
+<p>
+Other missing features:
+</p>
+<ul>
+<li>Bit operations for 64&nbsp;bit types.</li>
+<li>Arithmetic for <tt>complex</tt> numbers.</li>
+<li>Passing structs by value to vararg C&nbsp;functions.</li>
+<li><a href="extensions.html#exceptions">C++ exception interoperability</a>
+does not extend to C&nbsp;functions called via the FFI, if the call is
+compiled.</li>
+</ul>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 601 - 0
luajit.mod/luajit/doc/ext_ffi_tutorial.html

@@ -0,0 +1,601 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>FFI Tutorial</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+table.idiomtable { font-size: 90%; line-height: 1.2; }
+table.idiomtable tt { font-size: 100%; }
+table.idiomtable td { vertical-align: top; }
+tr.idiomhead td { font-weight: bold; }
+td.idiomlua b { font-weight: normal; color: #2142bf; }
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>FFI Tutorial</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a class="current" href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+This page is intended to give you an overview of the features of the FFI
+library by presenting a few use cases and guidelines.
+</p>
+<p>
+This page makes no attempt to explain all of the FFI library, though.
+You'll want to have a look at the <a href="ext_ffi_api.html">ffi.* API
+function reference</a> and the <a href="ext_ffi_semantics.html">FFI
+semantics</a> to learn more.
+</p>
+
+<h2 id="load">Loading the FFI Library</h2>
+<p>
+The FFI library is built into LuaJIT by default, but it's not loaded
+and initialized by default. The suggested way to use the FFI library
+is to add the following to the start of every Lua file that needs one
+of its functions:
+</p>
+<pre class="code">
+local ffi = require("ffi")
+</pre>
+<p>
+Please note this doesn't define an <tt>ffi</tt> variable in the table
+of globals &mdash; you really need to use the local variable. The
+<tt>require</tt> function ensures the library is only loaded once.
+</p>
+<p style="font-size: 8pt;">
+Note: If you want to experiment with the FFI from the interactive prompt
+of the command line executable, omit the <tt>local</tt>, as it doesn't
+preserve local variables across lines.
+</p>
+
+<h2 id="sleep">Accessing Standard System Functions</h2>
+<p>
+The following code explains how to access standard system functions.
+We slowly print two lines of dots by sleeping for 10&nbsp;milliseconds
+after each dot:
+</p>
+<pre class="code mark">
+<span class="codemark">&nbsp;
+&#9312;
+
+
+
+
+
+&#9313;
+&#9314;
+&#9315;
+
+
+
+&#9316;
+
+
+
+
+
+&#9317;</span>local ffi = require("ffi")
+ffi.cdef[[
+<span style="color:#00a000;">void Sleep(int ms);
+int poll(struct pollfd *fds, unsigned long nfds, int timeout);</span>
+]]
+
+local sleep
+if ffi.os == "Windows" then
+  function sleep(s)
+    ffi.C.Sleep(s*1000)
+  end
+else
+  function sleep(s)
+    ffi.C.poll(nil, 0, s*1000)
+  end
+end
+
+for i=1,160 do
+  io.write("."); io.flush()
+  sleep(0.01)
+end
+io.write("\n")
+</pre>
+<p>
+Here's the step-by-step explanation:
+</p>
+<p>
+<span class="mark">&#9312;</span> This defines the
+C&nbsp;library functions we're going to use. The part inside the
+double-brackets (in green) is just standard C&nbsp;syntax. You can
+usually get this info from the C&nbsp;header files or the
+documentation provided by each C&nbsp;library or C&nbsp;compiler.
+</p>
+<p>
+<span class="mark">&#9313;</span> The difficulty we're
+facing here, is that there are different standards to choose from.
+Windows has a simple <tt>Sleep()</tt> function. On other systems there
+are a variety of functions available to achieve sub-second sleeps, but
+with no clear consensus. Thankfully <tt>poll()</tt> can be used for
+this task, too, and it's present on most non-Windows systems. The
+check for <tt>ffi.os</tt> makes sure we use the Windows-specific
+function only on Windows systems.
+</p>
+<p>
+<span class="mark">&#9314;</span> Here we're wrapping the
+call to the C&nbsp;function in a Lua function. This isn't strictly
+necessary, but it's helpful to deal with system-specific issues only
+in one part of the code. The way we're wrapping it ensures the check
+for the OS is only done during initialization and not for every call.
+</p>
+<p>
+<span class="mark">&#9315;</span> A more subtle point is
+that we defined our <tt>sleep()</tt> function (for the sake of this
+example) as taking the number of seconds, but accepting fractional
+seconds. Multiplying this by 1000 gets us milliseconds, but that still
+leaves it a Lua number, which is a floating-point value. Alas, the
+<tt>Sleep()</tt> function only accepts an integer value. Luckily for
+us, the FFI library automatically performs the conversion when calling
+the function (truncating the FP value towards zero, like in C).
+</p>
+<p style="font-size: 8pt;">
+Some readers will notice that <tt>Sleep()</tt> is part of
+<tt>KERNEL32.DLL</tt> and is also a <tt>stdcall</tt> function. So how
+can this possibly work? The FFI library provides the <tt>ffi.C</tt>
+default C&nbsp;library namespace, which allows calling functions from
+the default set of libraries, like a C&nbsp;compiler would. Also, the
+FFI library automatically detects <tt>stdcall</tt> functions, so you
+don't need to declare them as such.
+</p>
+<p>
+<span class="mark">&#9316;</span> The <tt>poll()</tt>
+function takes a couple more arguments we're not going to use. You can
+simply use <tt>nil</tt> to pass a <tt>NULL</tt> pointer and <tt>0</tt>
+for the <tt>nfds</tt> parameter. Please note that the
+number&nbsp;<tt>0</tt> <em>does not convert to a pointer value</em>,
+unlike in C++. You really have to pass pointers to pointer arguments
+and numbers to number arguments.
+</p>
+<p style="font-size: 8pt;">
+The page on <a href="ext_ffi_semantics.html">FFI semantics</a> has all
+of the gory details about
+<a href="ext_ffi_semantics.html#convert">conversions between Lua
+objects and C&nbsp;types</a>. For the most part you don't have to deal
+with this, as it's performed automatically and it's carefully designed
+to bridge the semantic differences between Lua and C.
+</p>
+<p>
+<span class="mark">&#9317;</span> Now that we have defined
+our own <tt>sleep()</tt> function, we can just call it from plain Lua
+code. That wasn't so bad, huh? Turning these boring animated dots into
+a fascinating best-selling game is left as an exercise for the reader.
+:-)
+</p>
+
+<h2 id="zlib">Accessing the zlib Compression Library</h2>
+<p>
+The following code shows how to access the <a
+href="http://zlib.net/">zlib</a> compression library from Lua code.
+We'll define two convenience wrapper functions that take a string and
+compress or uncompress it to another string:
+</p>
+<pre class="code mark">
+<span class="codemark">&nbsp;
+&#9312;
+
+
+
+
+
+
+&#9313;
+
+
+&#9314;
+
+&#9315;
+
+
+&#9316;
+
+
+&#9317;
+
+
+
+
+
+
+
+&#9318;</span>local ffi = require("ffi")
+ffi.cdef[[
+<span style="color:#00a000;">unsigned long compressBound(unsigned long sourceLen);
+int compress2(uint8_t *dest, unsigned long *destLen,
+	      const uint8_t *source, unsigned long sourceLen, int level);
+int uncompress(uint8_t *dest, unsigned long *destLen,
+	       const uint8_t *source, unsigned long sourceLen);</span>
+]]
+local zlib = ffi.load(ffi.os == "Windows" and "zlib1" or "z")
+
+local function compress(txt)
+  local n = zlib.compressBound(#txt)
+  local buf = ffi.new("uint8_t[?]", n)
+  local buflen = ffi.new("unsigned long[1]", n)
+  local res = zlib.compress2(buf, buflen, txt, #txt, 9)
+  assert(res == 0)
+  return ffi.string(buf, buflen[0])
+end
+
+local function uncompress(comp, n)
+  local buf = ffi.new("uint8_t[?]", n)
+  local buflen = ffi.new("unsigned long[1]", n)
+  local res = zlib.uncompress(buf, buflen, comp, #comp)
+  assert(res == 0)
+  return ffi.string(buf, buflen[0])
+end
+
+-- Simple test code.
+local txt = string.rep("abcd", 1000)
+print("Uncompressed size: ", #txt)
+local c = compress(txt)
+print("Compressed size: ", #c)
+local txt2 = uncompress(c, #txt)
+assert(txt2 == txt)
+</pre>
+<p>
+Here's the step-by-step explanation:
+</p>
+<p>
+<span class="mark">&#9312;</span> This defines some of the
+C&nbsp;functions provided by zlib. For the sake of this example, some
+type indirections have been reduced and it uses the pre-defined
+fixed-size integer types, while still adhering to the zlib API/ABI.
+</p>
+<p>
+<span class="mark">&#9313;</span> This loads the zlib shared
+library. On POSIX systems it's named <tt>libz.so</tt> and usually
+comes pre-installed. Since <tt>ffi.load()</tt> automatically adds any
+missing standard prefixes/suffixes, we can simply load the
+<tt>"z"</tt> library. On Windows it's named <tt>zlib1.dll</tt> and
+you'll have to download it first from the
+<a href="http://zlib.net/"><span class="ext">&raquo;</span>&nbsp;zlib site</a>. The check for
+<tt>ffi.os</tt> makes sure we pass the right name to
+<tt>ffi.load()</tt>.
+</p>
+<p>
+<span class="mark">&#9314;</span> First, the maximum size of
+the compression buffer is obtained by calling the
+<tt>zlib.compressBound</tt> function with the length of the
+uncompressed string. The next line allocates a byte buffer of this
+size. The <tt>[?]</tt> in the type specification indicates a
+variable-length array (VLA). The actual number of elements of this
+array is given as the 2nd argument to <tt>ffi.new()</tt>.
+</p>
+<p>
+<span class="mark">&#9315;</span> This may look strange at
+first, but have a look at the declaration of the <tt>compress2</tt>
+function from zlib: the destination length is defined as a pointer!
+This is because you pass in the maximum buffer size and get back the
+actual length that was used.
+</p>
+<p>
+In C you'd pass in the address of a local variable
+(<tt>&amp;buflen</tt>). But since there's no address-of operator in
+Lua, we'll just pass in a one-element array. Conveniently it can be
+initialized with the maximum buffer size in one step. Calling the
+actual <tt>zlib.compress2</tt> function is then straightforward.
+</p>
+<p>
+<span class="mark">&#9316;</span> We want to return the
+compressed data as a Lua string, so we'll use <tt>ffi.string()</tt>.
+It needs a pointer to the start of the data and the actual length. The
+length has been returned in the <tt>buflen</tt> array, so we'll just
+get it from there.
+</p>
+<p style="font-size: 8pt;">
+Note that since the function returns now, the <tt>buf</tt> and
+<tt>buflen</tt> variables will eventually be garbage collected. This
+is fine, because <tt>ffi.string()</tt> has copied the contents to a
+newly created (interned) Lua string. If you plan to call this function
+lots of times, consider reusing the buffers and/or handing back the
+results in buffers instead of strings. This will reduce the overhead
+for garbage collection and string interning.
+</p>
+<p>
+<span class="mark">&#9317;</span> The <tt>uncompress</tt>
+functions does the exact opposite of the <tt>compress</tt> function.
+The compressed data doesn't include the size of the original string,
+so this needs to be passed in. Otherwise no surprises here.
+</p>
+<p>
+<span class="mark">&#9318;</span> The code, that makes use
+of the functions we just defined, is just plain Lua code. It doesn't
+need to know anything about the LuaJIT FFI &mdash; the convenience
+wrapper functions completely hide it.
+</p>
+<p>
+One major advantage of the LuaJIT FFI is that you are now able to
+write those wrappers <em>in Lua</em>. And at a fraction of the time it
+would cost you to create an extra C&nbsp;module using the Lua/C API.
+Many of the simpler C&nbsp;functions can probably be used directly
+from your Lua code, without any wrappers.
+</p>
+<p style="font-size: 8pt;">
+Side note: the zlib API uses the <tt>long</tt> type for passing
+lengths and sizes around. But all those zlib functions actually only
+deal with 32&nbsp;bit values. This is an unfortunate choice for a
+public API, but may be explained by zlib's history &mdash; we'll just
+have to deal with it.
+</p>
+<p style="font-size: 8pt;">
+First, you should know that a <tt>long</tt> is a 64&nbsp;bit type e.g.
+on POSIX/x64 systems, but a 32&nbsp;bit type on Windows/x64 and on
+32&nbsp;bit systems. Thus a <tt>long</tt> result can be either a plain
+Lua number or a boxed 64&nbsp;bit integer cdata object, depending on
+the target system.
+</p>
+<p style="font-size: 8pt;">
+Ok, so the <tt>ffi.*</tt> functions generally accept cdata objects
+wherever you'd want to use a number. That's why we get a away with
+passing <tt>n</tt> to <tt>ffi.string()</tt> above. But other Lua
+library functions or modules don't know how to deal with this. So for
+maximum portability one needs to use <tt>tonumber()</tt> on returned
+<tt>long</tt> results before passing them on. Otherwise the
+application might work on some systems, but would fail in a POSIX/x64
+environment.
+</p>
+
+<h2 id="metatype">Defining Metamethods for a C&nbsp;Type</h2>
+<p>
+The following code explains how to define metamethods for a C type.
+We define a simple point type and add some operations to it:
+</p>
+<pre class="code mark">
+<span class="codemark">&nbsp;
+&#9312;
+
+
+
+&#9313;
+
+&#9314;
+
+&#9315;
+
+
+
+&#9316;
+
+&#9317;</span>local ffi = require("ffi")
+ffi.cdef[[
+<span style="color:#00a000;">typedef struct { double x, y; } point_t;</span>
+]]
+
+local point
+local mt = {
+  __add = function(a, b) return point(a.x+b.x, a.y+b.y) end,
+  __len = function(a) return math.sqrt(a.x*a.x + a.y*a.y) end,
+  __index = {
+    area = function(a) return a.x*a.x + a.y*a.y end,
+  },
+}
+point = ffi.metatype("point_t", mt)
+
+local a = point(3, 4)
+print(a.x, a.y)  --> 3  4
+print(#a)        --> 5
+print(a:area())  --> 25
+local b = a + point(0.5, 8)
+print(#b)        --> 12.5
+</pre>
+<p>
+Here's the step-by-step explanation:
+</p>
+<p>
+<span class="mark">&#9312;</span> This defines the C&nbsp;type for a
+two-dimensional point object.
+</p>
+<p>
+<span class="mark">&#9313;</span> We have to declare the variable
+holding the point constructor first, because it's used inside of a
+metamethod.
+</p>
+<p>
+<span class="mark">&#9314;</span> Let's define an <tt>__add</tt>
+metamethod which adds the coordinates of two points and creates a new
+point object. For simplicity, this function assumes that both arguments
+are points. But it could be any mix of objects, if at least one operand
+is of the required type (e.g. adding a point plus a number or vice
+versa). Our <tt>__len</tt> metamethod returns the distance of a point to
+the origin.
+</p>
+<p>
+<span class="mark">&#9315;</span> If we run out of operators, we can
+define named methods, too. Here the <tt>__index</tt> table defines an
+<tt>area</tt> function. For custom indexing needs, one might want to
+define <tt>__index</tt> and <tt>__newindex</tt> <em>functions</em> instead.
+</p>
+<p>
+<span class="mark">&#9316;</span> This associates the metamethods with
+our C&nbsp;type. This only needs to be done once. For convenience, a
+constructor is returned by
+<a href="ext_ffi_api.html#ffi_metatype"><tt>ffi.metatype()</tt></a>.
+We're not required to use it, though. The original C&nbsp;type can still
+be used e.g. to create an array of points. The metamethods automatically
+apply to any and all uses of this type.
+</p>
+<p>
+Please note that the association with a metatable is permanent and
+<b>the metatable must not be modified afterwards!</b> Ditto for the
+<tt>__index</tt> table.
+</p>
+<p>
+<span class="mark">&#9317;</span> Here are some simple usage examples
+for the point type and their expected results. The pre-defined
+operations (such as <tt>a.x</tt>) can be freely mixed with the newly
+defined metamethods. Note that <tt>area</tt> is a method and must be
+called with the Lua syntax for methods: <tt>a:area()</tt>, not
+<tt>a.area()</tt>.
+</p>
+<p>
+The C&nbsp;type metamethod mechanism is most useful when used in
+conjunction with C&nbsp;libraries that are written in an object-oriented
+style. Creators return a pointer to a new instance and methods take an
+instance pointer as the first argument. Sometimes you can just point
+<tt>__index</tt> to the library namespace and <tt>__gc</tt> to the
+destructor and you're done. But often enough you'll want to add
+convenience wrappers, e.g. to return actual Lua strings or when
+returning multiple values.
+</p>
+<p>
+Some C libraries only declare instance pointers as an opaque
+<tt>void&nbsp;*</tt> type. In this case you can use a fake type for all
+declarations, e.g. a pointer to a named (incomplete) struct will do:
+<tt>typedef struct foo_type *foo_handle</tt>. The C&nbsp;side doesn't
+know what you declare with the LuaJIT FFI, but as long as the underlying
+types are compatible, everything still works.
+</p>
+
+<h2 id="idioms">Translating C&nbsp;Idioms</h2>
+<p>
+Here's a list of common C&nbsp;idioms and their translation to the
+LuaJIT FFI:
+</p>
+<table class="idiomtable">
+<tr class="idiomhead">
+<td class="idiomdesc">Idiom</td>
+<td class="idiomc">C&nbsp;code</td>
+<td class="idiomlua">Lua code</td>
+</tr>
+<tr class="odd separate">
+<td class="idiomdesc">Pointer dereference<br><tt>int *p;</tt></td><td class="idiomc"><tt>x = *p;<br>*p = y;</tt></td><td class="idiomlua"><tt>x = <b>p[0]</b><br><b>p[0]</b> = y</tt></td></tr>
+<tr class="even">
+<td class="idiomdesc">Pointer indexing<br><tt>int i, *p;</tt></td><td class="idiomc"><tt>x = p[i];<br>p[i+1] = y;</tt></td><td class="idiomlua"><tt>x = p[i]<br>p[i+1] = y</tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc">Array indexing<br><tt>int i, a[];</tt></td><td class="idiomc"><tt>x = a[i];<br>a[i+1] = y;</tt></td><td class="idiomlua"><tt>x = a[i]<br>a[i+1] = y</tt></td></tr>
+<tr class="even separate">
+<td class="idiomdesc"><tt>struct</tt>/<tt>union</tt> dereference<br><tt>struct foo s;</tt></td><td class="idiomc"><tt>x = s.field;<br>s.field = y;</tt></td><td class="idiomlua"><tt>x = s.field<br>s.field = y</tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc"><tt>struct</tt>/<tt>union</tt> pointer deref.<br><tt>struct foo *sp;</tt></td><td class="idiomc"><tt>x = sp->field;<br>sp->field = y;</tt></td><td class="idiomlua"><tt>x = <b>s.field</b><br><b>s.field</b> = y</tt></td></tr>
+<tr class="even separate">
+<td class="idiomdesc">Pointer arithmetic<br><tt>int i, *p;</tt></td><td class="idiomc"><tt>x = p + i;<br>y = p - i;</tt></td><td class="idiomlua"><tt>x = p + i<br>y = p - i</tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc">Pointer difference<br><tt>int *p1, *p2;</tt></td><td class="idiomc"><tt>x = p1 - p2;</tt></td><td class="idiomlua"><tt>x = p1 - p2</tt></td></tr>
+<tr class="even">
+<td class="idiomdesc">Array element pointer<br><tt>int i, a[];</tt></td><td class="idiomc"><tt>x = &amp;a[i];</tt></td><td class="idiomlua"><tt>x = <b>a+i</b></tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc">Cast pointer to address<br><tt>int *p;</tt></td><td class="idiomc"><tt>x = (intptr_t)p;</tt></td><td class="idiomlua"><tt>x = <b>tonumber(<br>&nbsp;ffi.cast("intptr_t",<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;p))</b></tt></td></tr>
+<tr class="even separate">
+<td class="idiomdesc">Functions with outargs<br><tt>void foo(int *inoutlen);</tt></td><td class="idiomc"><tt>int len = x;<br>foo(&amp;len);<br>y = len;</tt></td><td class="idiomlua"><tt><b>local len =<br>&nbsp;&nbsp;ffi.new("int[1]", x)<br>foo(len)<br>y = len[0]</b></tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc"><a href="ext_ffi_semantics.html#convert_vararg">Vararg conversions</a><br><tt>int printf(char *fmt, ...);</tt></td><td class="idiomc"><tt>printf("%g", 1.0);<br>printf("%d", 1);<br>&nbsp;</tt></td><td class="idiomlua"><tt>printf("%g", 1);<br>printf("%d",<br>&nbsp;&nbsp;<b>ffi.new("int", 1)</b>)</tt></td></tr>
+</table>
+
+<h2 id="cache">To Cache or Not to Cache</h2>
+<p>
+It's a common Lua idiom to cache library functions in local variables
+or upvalues, e.g.:
+</p>
+<pre class="code">
+local byte, char = string.byte, string.char
+local function foo(x)
+  return char(byte(x)+1)
+end
+</pre>
+<p>
+This replaces several hash-table lookups with a (faster) direct use of
+a local or an upvalue. This is less important with LuaJIT, since the
+JIT compiler optimizes hash-table lookups a lot and is even able to
+hoist most of them out of the inner loops. It can't eliminate
+<em>all</em> of them, though, and it saves some typing for often-used
+functions. So there's still a place for this, even with LuaJIT.
+</p>
+<p>
+The situation is a bit different with C&nbsp;function calls via the
+FFI library. The JIT compiler has special logic to eliminate <em>all
+of the lookup overhead</em> for functions resolved from a
+<a href="ext_ffi_semantics.html#clib">C&nbsp;library namespace</a>!
+Thus it's not helpful and actually counter-productive to cache
+individual C&nbsp;functions like this:
+</p>
+<pre class="code">
+local <b>funca</b>, <b>funcb</b> = ffi.C.funca, ffi.C.funcb -- <span style="color:#c00000;">Not helpful!</span>
+local function foo(x, n)
+  for i=1,n do <b>funcb</b>(<b>funca</b>(x, i), 1) end
+end
+</pre>
+<p>
+This turns them into indirect calls and generates bigger and slower
+machine code. Instead you'll want to cache the namespace itself and
+rely on the JIT compiler to eliminate the lookups:
+</p>
+<pre class="code">
+local <b>C</b> = ffi.C          -- <span style="color:#00a000;">Instead use this!</span>
+local function foo(x, n)
+  for i=1,n do <b>C.funcb</b>(<b>C.funca</b>(x, i), 1) end
+end
+</pre>
+<p>
+This generates both shorter and faster code. So <b>don't cache
+C&nbsp;functions</b>, but <b>do</b> cache namespaces! Most often the
+namespace is already in a local variable at an outer scope, e.g. from
+<tt>local&nbsp;lib&nbsp;=&nbsp;ffi.load(...)</tt>. Note that copying
+it to a local variable in the function scope is unnecessary.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 199 - 0
luajit.mod/luajit/doc/ext_jit.html

@@ -0,0 +1,199 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>jit.* Library</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1><tt>jit.*</tt> Library</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a class="current" href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+The functions in this built-in module control the behavior of the JIT
+compiler engine. Note that JIT-compilation is fully automatic &mdash;
+you probably won't need to use any of the following functions unless
+you have special needs.
+</p>
+
+<h3 id="jit_onoff"><tt>jit.on()<br>
+jit.off()</tt></h3>
+<p>
+Turns the whole JIT compiler on (default) or off.
+</p>
+<p>
+These functions are typically used with the command line options
+<tt>-j on</tt> or <tt>-j off</tt>.
+</p>
+
+<h3 id="jit_flush"><tt>jit.flush()</tt></h3>
+<p>
+Flushes the whole cache of compiled code.
+</p>
+
+<h3 id="jit_onoff_func"><tt>jit.on(func|true [,true|false])<br>
+jit.off(func|true [,true|false])<br>
+jit.flush(func|true [,true|false])</tt></h3>
+<p>
+<tt>jit.on</tt> enables JIT compilation for a Lua function (this is
+the default).
+</p>
+<p>
+<tt>jit.off</tt> disables JIT compilation for a Lua function and
+flushes any already compiled code from the code cache.
+</p>
+<p>
+<tt>jit.flush</tt> flushes the code, but doesn't affect the
+enable/disable status.
+</p>
+<p>
+The current function, i.e. the Lua function calling this library
+function, can also be specified by passing <tt>true</tt> as the first
+argument.
+</p>
+<p>
+If the second argument is <tt>true</tt>, JIT compilation is also
+enabled, disabled or flushed recursively for all sub-functions of a
+function. With <tt>false</tt> only the sub-functions are affected.
+</p>
+<p>
+The <tt>jit.on</tt> and <tt>jit.off</tt> functions only set a flag
+which is checked when the function is about to be compiled. They do
+not trigger immediate compilation.
+</p>
+<p>
+Typical usage is <tt>jit.off(true, true)</tt> in the main chunk
+of a module to turn off JIT compilation for the whole module for
+debugging purposes.
+</p>
+
+<h3 id="jit_flush_tr"><tt>jit.flush(tr)</tt></h3>
+<p>
+Flushes the root trace, specified by its number, and all of its side
+traces from the cache. The code for the trace will be retained as long
+as there are any other traces which link to it.
+</p>
+
+<h3 id="jit_status"><tt>status, ... = jit.status()</tt></h3>
+<p>
+Returns the current status of the JIT compiler. The first result is
+either <tt>true</tt> or <tt>false</tt> if the JIT compiler is turned
+on or off. The remaining results are strings for CPU-specific features
+and enabled optimizations.
+</p>
+
+<h3 id="jit_version"><tt>jit.version</tt></h3>
+<p>
+Contains the LuaJIT version string.
+</p>
+
+<h3 id="jit_version_num"><tt>jit.version_num</tt></h3>
+<p>
+Contains the version number of the LuaJIT core. Version xx.yy.zz
+is represented by the decimal number xxyyzz.
+</p>
+
+<h3 id="jit_os"><tt>jit.os</tt></h3>
+<p>
+Contains the target OS name:
+"Windows", "Linux", "OSX", "BSD", "POSIX" or "Other".
+</p>
+
+<h3 id="jit_arch"><tt>jit.arch</tt></h3>
+<p>
+Contains the target architecture name:
+"x86", "x64", "arm", "ppc", "ppcspe", or "mips".
+</p>
+
+<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
+<p>
+This sub-module provides the backend for the <tt>-O</tt> command line
+option.
+</p>
+<p>
+You can also use it programmatically, e.g.:
+</p>
+<pre class="code">
+jit.opt.start(2) -- same as -O2
+jit.opt.start("-dce")
+jit.opt.start("hotloop=10", "hotexit=2")
+</pre>
+<p>
+Unlike in LuaJIT 1.x, the module is built-in and
+<b>optimization is turned on by default!</b>
+It's no longer necessary to run <tt>require("jit.opt").start()</tt>,
+which was one of the ways to enable optimization.
+</p>
+
+<h2 id="jit_util"><tt>jit.util.*</tt> &mdash; JIT compiler introspection</h2>
+<p>
+This sub-module holds functions to introspect the bytecode, generated
+traces, the IR and the generated machine code. The functionality
+provided by this module is still in flux and therefore undocumented.
+</p>
+<p>
+The debug modules <tt>-jbc</tt>, <tt>-jv</tt> and <tt>-jdump</tt> make
+extensive use of these functions. Please check out their source code,
+if you want to know more.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 408 - 0
luajit.mod/luajit/doc/extensions.html

@@ -0,0 +1,408 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Extensions</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+table.exc {
+  line-height: 1.2;
+}
+tr.exchead td {
+  font-weight: bold;
+}
+td.excplatform {
+  width: 48%;
+}
+td.exccompiler {
+  width: 29%;
+}
+td.excinterop {
+  width: 23%;
+}
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Extensions</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a class="current" href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT is fully upwards-compatible with Lua 5.1. It supports all
+<a href="http://www.lua.org/manual/5.1/manual.html#5"><span class="ext">&raquo;</span>&nbsp;standard Lua
+library functions</a> and the full set of
+<a href="http://www.lua.org/manual/5.1/manual.html#3"><span class="ext">&raquo;</span>&nbsp;Lua/C API
+functions</a>.
+</p>
+<p>
+LuaJIT is also fully ABI-compatible to Lua 5.1 at the linker/dynamic
+loader level. This means you can compile a C&nbsp;module against the
+standard Lua headers and load the same shared library from either Lua
+or LuaJIT.
+</p>
+<p>
+LuaJIT extends the standard Lua VM with new functionality and adds
+several extension modules. Please note this page is only about
+<em>functional</em> enhancements and not about performance enhancements,
+such as the optimized VM, the faster interpreter or the JIT compiler.
+</p>
+
+<h2 id="modules">Extensions Modules</h2>
+<p>
+LuaJIT comes with several built-in extension modules:
+</p>
+
+<h3 id="bit"><tt>bit.*</tt> &mdash; Bitwise operations</h3>
+<p>
+LuaJIT supports all bitwise operations as defined by
+<a href="http://bitop.luajit.org"><span class="ext">&raquo;</span>&nbsp;Lua BitOp</a>:
+</p>
+<pre class="code">
+bit.tobit  bit.tohex  bit.bnot    bit.band bit.bor  bit.bxor
+bit.lshift bit.rshift bit.arshift bit.rol  bit.ror  bit.bswap
+</pre>
+<p>
+This module is a LuaJIT built-in &mdash; you don't need to download or
+install Lua BitOp. The Lua BitOp site has full documentation for all
+<a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
+</p>
+<p>
+Please make sure to <tt>require</tt> the module before using any of
+its functions:
+</p>
+<pre class="code">
+local bit = require("bit")
+</pre>
+<p>
+An already installed Lua BitOp module is ignored by LuaJIT.
+This way you can use bit operations from both Lua and LuaJIT on a
+shared installation.
+</p>
+
+<h3 id="ffi"><tt>ffi.*</tt> &mdash; FFI library</h3>
+<p>
+The <a href="ext_ffi.html">FFI library</a> allows calling external
+C&nbsp;functions and the use of C&nbsp;data structures from pure Lua
+code.
+</p>
+
+<h3 id="jit"><tt>jit.*</tt> &mdash; JIT compiler control</h3>
+<p>
+The functions in this module
+<a href="ext_jit.html">control the behavior of the JIT compiler engine</a>.
+</p>
+
+<h3 id="c_api">C API extensions</h3>
+<p>
+LuaJIT adds some
+<a href="ext_c_api.html">extra functions to the Lua/C API</a>.
+</p>
+
+<h2 id="library">Enhanced Standard Library Functions</h2>
+
+<h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
+<p>
+Unlike the standard implementation in Lua 5.1, <tt>xpcall()</tt>
+passes any arguments after the error function to the function
+which is called in a protected context.
+</p>
+
+<h3 id="load"><tt>loadfile()</tt> etc. handle UTF-8 source code</h3>
+<p>
+Non-ASCII characters are handled transparently by the Lua source code parser.
+This allows the use of UTF-8 characters in identifiers and strings.
+A UTF-8 BOM is skipped at the start of the source code.
+</p>
+
+<h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and &plusmn;Inf</h3>
+<p>
+All number-to-string conversions consistently convert non-finite numbers
+to the same strings on all platforms. NaN results in <tt>"nan"</tt>,
+positive infinity results in <tt>"inf"</tt> and negative infinity results
+in <tt>"-inf"</tt>.
+</p>
+
+<h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
+<p>
+All string-to-number conversions consistently convert integer and
+floating-point inputs in decimal and hexadecimal on all platforms.
+<tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
+problems with poor C library implementations. The builtin conversion
+function provides full precision according to the IEEE-754 standard, it
+works independently of the current locale and it supports hex floating-point
+numbers (e.g. <tt>0x1.5p-3</tt>).
+</p>
+
+<h3 id="string_dump"><tt>string.dump(f [,strip])</tt> generates portable bytecode</h3>
+<p>
+An extra argument has been added to <tt>string.dump()</tt>. If set to
+<tt>true</tt>, 'stripped' bytecode without debug information is
+generated. This speeds up later bytecode loading and reduces memory
+usage. See also the
+<a href="running.html#opt_b"><tt>-b</tt> command line option</a>.
+</p>
+<p>
+The generated bytecode is portable and can be loaded on any architecture
+that LuaJIT supports, independent of word size or endianess. However the
+bytecode compatibility versions must match. Bytecode stays compatible
+for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
+minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
+bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
+</p>
+
+<h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
+<p>
+LuaJIT uses a Tausworthe PRNG with period 2^223 to implement
+<tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of
+the PRNG results is much superior compared to the standard Lua
+implementation which uses the platform-specific ANSI rand().
+</p>
+<p>
+The PRNG generates the same sequences from the same seeds on all
+platforms and makes use of all bits in the seed argument.
+<tt>math.random()</tt> without arguments generates 52 pseudo-random bits
+for every call. The result is uniformly distributed between 0.0 and 1.0.
+It's correctly scaled up and rounded for <tt>math.random(n&nbsp;[,m])</tt> to
+preserve uniformity.
+</p>
+
+<h3 id="io"><tt>io.*</tt> functions handle 64&nbsp;bit file offsets</h3>
+<p>
+The file I/O functions in the standard <tt>io.*</tt> library handle
+64&nbsp;bit file offsets. In particular this means it's possible
+to open files larger than 2&nbsp;Gigabytes and to reposition or obtain
+the current file position for offsets beyond 2&nbsp;GB
+(<tt>fp:seek()</tt> method).
+</p>
+
+<h3 id="debug_meta"><tt>debug.*</tt> functions identify metamethods</h3>
+<p>
+<tt>debug.getinfo()</tt> and <tt>lua_getinfo()</tt> also return information
+about invoked metamethods. The <tt>namewhat</tt> field is set to
+<tt>"metamethod"</tt> and the <tt>name</tt> field has the name of
+the corresponding metamethod (e.g. <tt>"__index"</tt>).
+</p>
+
+<h2 id="resumable">Fully Resumable VM</h2>
+<p>
+The LuaJIT VM is fully resumable. This means you can yield from a
+coroutine even across contexts, where this would not possible with
+the standard Lua&nbsp;5.1 VM: e.g. you can yield across <tt>pcall()</tt>
+and <tt>xpcall()</tt>, across iterators and across metamethods.
+</p>
+
+<h2 id="lua52">Extensions from Lua 5.2</h2>
+<p>
+LuaJIT supports some language and library extensions from Lua&nbsp;5.2.
+Features that are unlikely to break existing code are unconditionally
+enabled:
+</p>
+<ul>
+<li><tt>goto</tt> and <tt>::labels::</tt>.</li>
+<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li>
+<li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li>
+<li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li>
+<li><tt>loadfile(filename [,mode [,env]])</tt>.</li>
+<li><tt>math.log(x [,base])</tt>.
+<li><tt>string.rep(s, n [,sep])</tt>.
+<li><tt>string.format()</tt>: <tt>%q</tt> reversible.
+<tt>%s</tt> checks <tt>__tostring</tt>.
+<tt>%a</tt> and <tt>"%A</tt> added.</li>
+<li>String matching pattern <tt>%g</tt> added.</li>
+<li><tt>io.read("*L")</tt>.</li>
+<li><tt>io.lines()</tt> and <tt>file:lines()</tt> process
+<tt>io.read()</tt> options.</li>
+<li><tt>os.exit(status|true|false [,close])</tt>.</li>
+<li><tt>package.searchpath(name, path [, sep [, rep]])</tt>.</li>
+<li><tt>package.loadlib(name, "*")</tt>.</li>
+<li><tt>debug.getinfo()</tt> returns <tt>nparams</tt> and <tt>isvararg</tt>
+for option <tt>"u"</tt>.</li>
+<li><tt>debug.getlocal()</tt> accepts function instead of level.</li>
+<li><tt>debug.getlocal()</tt> and <tt>debug.setlocal()</tt> accept negative
+indexes for varargs.</li>
+<li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle
+C&nbsp;functions.</li>
+<li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li>
+<li>Command line option <tt>-E</tt>.</li>
+<li>Command line checks <tt>__tostring</tt> for errors.</li>
+</ul>
+<p>
+Other features are only enabled, if LuaJIT is built with
+<tt>-DLUAJIT_ENABLE_LUA52COMPAT</tt>:
+</p>
+<ul>
+<li><tt>goto</tt> is a keyword and not a valid variable name anymore.</li>
+<li><tt>break</tt> can be placed anywhere. Empty statements (<tt>;;</tt>)
+are allowed.</li>
+<li><tt>__lt</tt>, <tt>__le</tt> are invoked for mixed types.</li>
+<li><tt>__len</tt> for tables. <tt>rawlen()</tt> library function.</li>
+<li><tt>pairs()</tt> and <tt>ipairs()</tt> check for <tt>__pairs</tt> and
+<tt>__ipairs</tt>.</li>
+<li><tt>coroutine.running()</tt> returns two results.</li>
+<li><tt>table.pack()</tt> and <tt>table.unpack()</tt>
+(same as <tt>unpack()</tt>).</li>
+<li><tt>io.write()</tt> and <tt>file:write()</tt> return file handle
+instead of <tt>true</tt>.</li>
+<li><tt>os.execute()</tt> and <tt>pipe:close()</tt> return detailed
+exit status.</li>
+<li><tt>debug.setmetatable()</tt> returns object.</li>
+<li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li>
+<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.
+</ul>
+<p>
+Note: this provides only partial compatibility with Lua 5.2 at the
+language and Lua library level. LuaJIT is API+ABI-compatible with
+Lua&nbsp;5.1, which prevents implementing features that would otherwise
+break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
+</p>
+
+<h2 id="exceptions">C++ Exception Interoperability</h2>
+<p>
+LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
+The available range of features depends on the target platform and
+the toolchain used to compile LuaJIT:
+</p>
+<table class="exc">
+<tr class="exchead">
+<td class="excplatform">Platform</td>
+<td class="exccompiler">Compiler</td>
+<td class="excinterop">Interoperability</td>
+</tr>
+<tr class="odd separate">
+<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
+<td class="exccompiler">GCC 4.3+</td>
+<td class="excinterop"><b style="color: #00a000;">Full</b></td>
+</tr>
+<tr class="even">
+<td class="excplatform">Other platforms, DWARF2 unwinding</td>
+<td class="exccompiler">GCC</td>
+<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
+</tr>
+<tr class="odd">
+<td class="excplatform">Windows/x64</td>
+<td class="exccompiler">MSVC or WinSDK</td>
+<td class="excinterop"><b style="color: #00a000;">Full</b></td>
+</tr>
+<tr class="even">
+<td class="excplatform">Windows/x86</td>
+<td class="exccompiler">Any</td>
+<td class="excinterop"><b style="color: #a00000;">No</b></td>
+</tr>
+<tr class="odd">
+<td class="excplatform">Other platforms</td>
+<td class="exccompiler">Other compilers</td>
+<td class="excinterop"><b style="color: #a00000;">No</b></td>
+</tr>
+</table>
+<p>
+<b style="color: #00a000;">Full interoperability</b> means:
+</p>
+<ul>
+<li>C++&nbsp;exceptions can be caught on the Lua side with <tt>pcall()</tt>,
+<tt>lua_pcall()</tt> etc.</li>
+<li>C++&nbsp;exceptions will be converted to the generic Lua error
+<tt>"C++&nbsp;exception"</tt>, unless you use the
+<a href="ext_c_api.html#mode_wrapcfunc">C&nbsp;call wrapper</a> feature.</li>
+<li>It's safe to throw C++&nbsp;exceptions across non-protected Lua frames
+on the C&nbsp;stack. The contents of the C++&nbsp;exception object
+pass through unmodified.</li>
+<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
+The corresponding Lua error message can be retrieved from the Lua stack.</li>
+<li>Throwing Lua errors across C++ frames is safe. C++ destructors
+will be called.</li>
+</ul>
+<p>
+<b style="color: #c06000;">Limited interoperability</b> means:
+</p>
+<ul>
+<li>C++&nbsp;exceptions can be caught on the Lua side with <tt>pcall()</tt>,
+<tt>lua_pcall()</tt> etc.</li>
+<li>C++&nbsp;exceptions will be converted to the generic Lua error
+<tt>"C++&nbsp;exception"</tt>, unless you use the
+<a href="ext_c_api.html#mode_wrapcfunc">C&nbsp;call wrapper</a> feature.</li>
+<li>C++&nbsp;exceptions will be caught by non-protected Lua frames and
+are rethrown as a generic Lua error. The C++&nbsp;exception object will
+be destroyed.</li>
+<li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
+<li>Throwing Lua errors across C++ frames will <b>not</b> call
+C++ destructors.</li>
+</ul>
+
+<p>
+<b style="color: #a00000;">No interoperability</b> means:
+</p>
+<ul>
+<li>It's <b>not</b> safe to throw C++&nbsp;exceptions across Lua frames.</li>
+<li>C++&nbsp;exceptions <b>cannot</b> be caught on the Lua side.</li>
+<li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
+<li>Throwing Lua errors across C++ frames will <b>not</b> call
+C++ destructors.</li>
+<li>Additionally, on Windows/x86 with SEH-based C++&nbsp;exceptions:
+it's <b>not</b> safe to throw a Lua error across any frames containing
+a C++ function with any try/catch construct or using variables with
+(implicit) destructors. This also applies to any functions which may be
+inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
+is called inside or outside of a try/catch or whether any object actually
+needs to be destroyed: the SEH chain is corrupted and this will eventually
+lead to the termination of the process.</li>
+</ul>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 184 - 0
luajit.mod/luajit/doc/faq.html

@@ -0,0 +1,184 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Frequently Asked Questions (FAQ)</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+dd { margin-left: 1.5em; }
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Frequently Asked Questions (FAQ)</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a class="current" href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<dl>
+<dt>Q: Where can I learn more about LuaJIT and Lua?</dt>
+<dd>
+<ul style="padding: 0;">
+<li>The <a href="http://luajit.org/list.html"><span class="ext">&raquo;</span>&nbsp;LuaJIT mailing list</a> focuses on topics
+related to LuaJIT.</li>
+<li>The <a href="http://wiki.luajit.org/"><span class="ext">&raquo;</span>&nbsp;LuaJIT wiki</a> gathers community
+resources about LuaJIT.</li>
+<li>News about Lua itself can be found at the
+<a href="http://www.lua.org/lua-l.html"><span class="ext">&raquo;</span>&nbsp;Lua mailing list</a>.
+The mailing list archives are worth checking out for older postings
+about LuaJIT.</li>
+<li>The <a href="http://lua.org"><span class="ext">&raquo;</span>&nbsp;main Lua.org site</a> has complete
+<a href="http://www.lua.org/docs.html"><span class="ext">&raquo;</span>&nbsp;documentation</a> of the language
+and links to books and papers about Lua.</li>
+<li>The community-managed <a href="http://lua-users.org/wiki/"><span class="ext">&raquo;</span>&nbsp;Lua Wiki</a>
+has information about diverse topics.</li>
+</ul>
+</dl>
+
+<dl>
+<dt>Q: Where can I learn more about the compiler technology used by LuaJIT?</dt>
+<dd>
+I'm planning to write more documentation about the internals of LuaJIT.
+In the meantime, please use the following Google Scholar searches
+to find relevant papers:<br>
+Search for: <a href="http://scholar.google.com/scholar?q=Trace+Compiler"><span class="ext">&raquo;</span>&nbsp;Trace Compiler</a><br>
+Search for: <a href="http://scholar.google.com/scholar?q=JIT+Compiler"><span class="ext">&raquo;</span>&nbsp;JIT Compiler</a><br>
+Search for: <a href="http://scholar.google.com/scholar?q=Dynamic+Language+Optimizations"><span class="ext">&raquo;</span>&nbsp;Dynamic Language Optimizations</a><br>
+Search for: <a href="http://scholar.google.com/scholar?q=SSA+Form"><span class="ext">&raquo;</span>&nbsp;SSA Form</a><br>
+Search for: <a href="http://scholar.google.com/scholar?q=Linear+Scan+Register+Allocation"><span class="ext">&raquo;</span>&nbsp;Linear Scan Register Allocation</a><br>
+Here is a list of the <a href="http://article.gmane.org/gmane.comp.lang.lua.general/58908"><span class="ext">&raquo;</span>&nbsp;innovative features in LuaJIT</a>.<br>
+And, you know, reading the source is of course the only way to enlightenment. :-)
+</dd>
+</dl>
+
+<dl>
+<dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?<br>
+Q: My vararg functions fail after switching to LuaJIT!</dt>
+<dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't
+support the implicit <tt>arg</tt> parameter for old-style vararg
+functions from Lua 5.0.<br>Please convert your code to the
+<a href="http://www.lua.org/manual/5.1/manual.html#2.5.9"><span class="ext">&raquo;</span>&nbsp;Lua 5.1
+vararg syntax</a>.</dd>
+</dl>
+
+<dl>
+<dt>Q: Why do I get this error: "bad FPU precision"?<br>
+<dt>Q: I get weird behavior after initializing Direct3D.<br>
+<dt>Q: Some FPU operations crash after I load a Delphi DLL.<br>
+</dt>
+<dd>
+
+DirectX/Direct3D (up to version 9) sets the x87 FPU to single-precision
+mode by default. This violates the Windows ABI and interferes with the
+operation of many programs &mdash; LuaJIT is affected, too. Please make
+sure you always use the <tt>D3DCREATE_FPU_PRESERVE</tt> flag when
+initializing Direct3D.<br>
+
+Direct3D version 10 or higher do not show this behavior anymore.
+Consider testing your application with older versions, too.<br>
+
+Similarly, the Borland/Delphi runtime modifies the FPU control word and
+enables FP exceptions. Of course this violates the Windows ABI, too.
+Please check the Delphi docs for the Set8087CW method.
+
+</dl>
+
+<dl>
+<dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt>
+<dd>The interrupt signal handler sets a Lua debug hook. But this is
+currently ignored by compiled code (this will eventually be fixed). If
+your program is running in a tight loop and never falls back to the
+interpreter, the debug hook never runs and can't throw the
+"interrupted!" error.<br> In the meantime you have to press Ctrl-C
+twice to get stop your program. That's similar to when it's stuck
+running inside a C function under the Lua interpreter.</dd>
+</dl>
+
+<dl>
+<dt>Q: Why doesn't my favorite power-patch for Lua apply against LuaJIT?</dt>
+<dd>Because it's a completely redesigned VM and has very little code
+in common with Lua anymore. Also, if the patch introduces changes to
+the Lua semantics, these would need to be reflected everywhere in the
+VM, from the interpreter up to all stages of the compiler.<br> Please
+use only standard Lua language constructs. For many common needs you
+can use source transformations or use wrapper or proxy functions.
+The compiler will happily optimize away such indirections.</dd>
+</dl>
+
+<dl>
+<dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt>
+<dd>Because it's a compiler &mdash; it needs to generate native
+machine code. This means the code generator must be ported to each
+architecture. And the fast interpreter is written in assembler and
+must be ported, too. This is quite an undertaking.<br>
+The <a href="install.html">install documentation</a> shows the supported
+architectures. Other architectures will follow based on sufficient user
+demand and/or sponsoring.</dd>
+</dl>
+
+<dl>
+<dt>Q: When will feature X be added? When will the next version be released?</dt>
+<dd>When it's ready.<br>
+C'mon, it's open source &mdash; I'm doing it on my own time and you're
+getting it for free. You can either contribute a patch or sponsor
+the development of certain features, if they are important to you.
+</dd>
+</dl>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

BIN
luajit.mod/luajit/doc/img/contact.png


+ 646 - 0
luajit.mod/luajit/doc/install.html

@@ -0,0 +1,646 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Installation</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+table.compat {
+  line-height: 1.2;
+  font-size: 80%;
+}
+table.compat td {
+  border: 1px solid #bfcfff;
+  height: 2.5em;
+}
+table.compat tr.compathead td {
+  font-weight: bold;
+  border-bottom: 2px solid #bfcfff;
+}
+tr.compathead td.compatos {
+  vertical-align: top;
+}
+table.compat td.compatcpu {
+  width: 18%;
+  border-right: 2px solid #bfcfff;
+}
+td.compatos {
+  width: 21%;
+  vertical-align: middle;
+}
+td.compatno {
+  background-color: #d0d0d0;
+}
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Installation</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a class="current" href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT is only distributed as a source package. This page explains
+how to build and install LuaJIT with different operating systems
+and C&nbsp;compilers.
+</p>
+<p>
+For the impatient (on POSIX systems):
+</p>
+<pre class="code">
+make &amp;&amp; sudo make install
+</pre>
+<p>
+LuaJIT currently builds out-of-the box on most systems.
+Here's the compatibility matrix for the supported combinations of
+operating systems, CPUs and compilers:
+</p>
+<table class="compat">
+<tr class="compathead">
+<td class="compatcpu">CPU / OS</td>
+<td class="compatos"><a href="#posix">Linux</a> or<br><a href="#android">Android</a></td>
+<td class="compatos"><a href="#posix">*BSD, Other</a></td>
+<td class="compatos"><a href="#posix">OSX 10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td>
+<td class="compatos"><a href="#windows">Windows<br>XP/Vista/7</a></td>
+</tr>
+<tr class="odd separate">
+<td class="compatcpu">x86 (32 bit)</td>
+<td class="compatos">GCC 4.x<br>GCC 3.4</td>
+<td class="compatos">GCC 4.x<br>GCC 3.4</td>
+<td class="compatos">GCC 4.x<br>GCC 3.4</td>
+<td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td>
+</tr>
+<tr class="even">
+<td class="compatcpu">x64 (64 bit)</td>
+<td class="compatos">GCC 4.x</td>
+<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td>
+<td class="compatos">GCC 4.x</td>
+<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0</td>
+</tr>
+<tr class="odd">
+<td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos">GCC 4.2+<br>PSP2 (<a href="#psvita">PS VITA</a>)</td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos compatno">&nbsp;</td>
+</tr>
+<tr class="even">
+<td class="compatcpu"><a href="#cross2">PPC</a></td>
+<td class="compatos">GCC 4.3+</td>
+<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
+<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
+</tr>
+<tr class="odd">
+<td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td>
+<td class="compatos">GCC 4.3+</td>
+<td class="compatos">GCC 4.3+</td>
+<td class="compatos compatno">&nbsp;</td>
+<td class="compatos compatno">&nbsp;</td>
+</tr>
+<tr class="even">
+<td class="compatcpu"><a href="#cross2">MIPS</a></td>
+<td class="compatos">GCC 4.3+</td>
+<td class="compatos">GCC 4.3+</td>
+<td class="compatos compatno">&nbsp;</td>
+<td class="compatos compatno">&nbsp;</td>
+</tr>
+</table>
+
+<h2>Configuring LuaJIT</h2>
+<p>
+The standard configuration should work fine for most installations.
+Usually there is no need to tweak the settings. The following files
+hold all user-configurable settings:
+</p>
+<ul>
+<li><tt>src/luaconf.h</tt> sets some configuration variables.</li>
+<li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX
+only).</li>
+<li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT
+under POSIX, MinGW or Cygwin.</li>
+<li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
+MSVC or WinSDK.</li>
+</ul>
+<p>
+Please read the instructions given in these files, before changing
+any settings.
+</p>
+
+<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2>
+<h3>Prerequisites</h3>
+<p>
+Depending on your distribution, you may need to install a package for
+GCC, the development headers and/or a complete SDK. E.g. on a current
+Debian/Ubuntu, install <tt>libc6-dev</tt> with the package manager.
+</p>
+<p>
+Download the current source package of LuaJIT (pick the .tar.gz),
+if you haven't already done so. Move it to a directory of your choice,
+open a terminal window and change to this directory. Now unpack the archive
+and change to the newly created directory:
+</p>
+<pre class="code">
+tar zxf LuaJIT-2.0.4.tar.gz
+cd LuaJIT-2.0.4</pre>
+<h3>Building LuaJIT</h3>
+<p>
+The supplied Makefiles try to auto-detect the settings needed for your
+operating system and your compiler. They need to be run with GNU Make,
+which is probably the default on your system, anyway. Simply run:
+</p>
+<pre class="code">
+make
+</pre>
+<p>
+This always builds a native x86, x64 or PPC binary, depending on the host OS
+you're running this command on. Check the section on
+<a href="#cross">cross-compilation</a> for more options.
+</p>
+<p>
+By default, modules are only searched under the prefix <tt>/usr/local</tt>.
+You can add an extra prefix to the search paths by appending the
+<tt>PREFIX</tt> option, e.g.:
+</p>
+<pre class="code">
+make PREFIX=/home/myself/lj2
+</pre>
+<p>
+Note for OSX: if the <tt>MACOSX_DEPLOYMENT_TARGET</tt> environment
+variable is not set, then it's forced to <tt>10.4</tt>.
+</p>
+<h3>Installing LuaJIT</h3>
+<p>
+The top-level Makefile installs LuaJIT by default under
+<tt>/usr/local</tt>, i.e. the executable ends up in
+<tt>/usr/local/bin</tt> and so on. You need root privileges
+to write to this path. So, assuming sudo is installed on your system,
+run the following command and enter your sudo password:
+</p>
+<pre class="code">
+sudo make install
+</pre>
+<p>
+Otherwise specify the directory prefix as an absolute path, e.g.:
+</p>
+<pre class="code">
+make install PREFIX=/home/myself/lj2
+</pre>
+<p>
+Obviously the prefixes given during build and installation need to be the same.
+</p>
+
+<h2 id="windows">Windows Systems</h2>
+<h3>Prerequisites</h3>
+<p>
+Either install one of the open source SDKs
+(<a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a> or
+<a href="http://www.cygwin.com/"><span class="ext">&raquo;</span>&nbsp;Cygwin</a>), which come with a modified
+GCC plus the required development headers.
+</p>
+<p>
+Or install Microsoft's Visual C++ (MSVC). The freely downloadable
+<a href="http://www.microsoft.com/Express/VC/"><span class="ext">&raquo;</span>&nbsp;Express Edition</a>
+works just fine, but only contains an x86 compiler.
+</p>
+<p>
+The freely downloadable
+<a href="http://msdn.microsoft.com/en-us/windowsserver/bb980924.aspx"><span class="ext">&raquo;</span>&nbsp;Windows SDK</a>
+only comes with command line tools, but this is all you need to build LuaJIT.
+It contains x86 and x64 compilers.
+</p>
+<p>
+Next, download the source package and unpack it using an archive manager
+(e.g. the Windows Explorer) to a directory of your choice.
+</p>
+<h3>Building with MSVC</h3>
+<p>
+Open a "Visual Studio .NET Command Prompt", <tt>cd</tt> to the
+directory where you've unpacked the sources and run these commands:
+</p>
+<pre class="code">
+cd src
+msvcbuild
+</pre>
+<p>
+Then follow the installation instructions below.
+</p>
+<h3>Building with the Windows SDK</h3>
+<p>
+Open a "Windows SDK Command Shell" and select the x86 compiler:
+</p>
+<pre class="code">
+setenv /release /x86
+</pre>
+<p>
+Or select the x64 compiler:
+</p>
+<pre class="code">
+setenv /release /x64
+</pre>
+<p>
+Then <tt>cd</tt> to the directory where you've unpacked the sources
+and run these commands:
+</p>
+<pre class="code">
+cd src
+msvcbuild
+</pre>
+<p>
+Then follow the installation instructions below.
+</p>
+<h3>Building with MinGW or Cygwin</h3>
+<p>
+Open a command prompt window and make sure the MinGW or Cygwin programs
+are in your path. Then <tt>cd</tt> to the directory where
+you've unpacked the sources and run this command for MinGW:
+</p>
+<pre class="code">
+mingw32-make
+</pre>
+<p>
+Or this command for Cygwin:
+</p>
+<pre class="code">
+make
+</pre>
+<p>
+Then follow the installation instructions below.
+</p>
+<h3>Installing LuaJIT</h3>
+<p>
+Copy <tt>luajit.exe</tt> and <tt>lua51.dll</tt> (built in the <tt>src</tt>
+directory) to a newly created directory (any location is ok).
+Add <tt>lua</tt> and <tt>lua\jit</tt> directories below it and copy
+all Lua files from the <tt>src\jit</tt> directory of the distribution
+to the latter directory.
+</p>
+<p>
+There are no hardcoded
+absolute path names &mdash; all modules are loaded relative to the
+directory where <tt>luajit.exe</tt> is installed
+(see <tt>src/luaconf.h</tt>).
+</p>
+
+<h2 id="cross">Cross-compiling LuaJIT</h2>
+<p>
+The GNU Makefile-based build system allows cross-compiling on any host
+for any supported target, as long as both architectures have the same
+pointer size. If you want to cross-compile to any 32 bit target on an
+x64 OS, you need to install the multilib development package (e.g.
+<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
+(<tt>HOST_CC="gcc -m32"</tt>).
+</p>
+<p>
+You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
+target OS differ, or you'll get assembler or linker errors. E.g. if
+you're compiling on a Windows or OSX host for embedded Linux or Android,
+you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
+minimal target OS, you may need to disable the built-in allocator in
+<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. The examples
+below only show some popular targets &mdash; please check the comments
+in <tt>src/Makefile</tt> for more details.
+</p>
+<pre class="code">
+# Cross-compile to a 32 bit binary on a multilib x64 OS
+make CC="gcc -m32"
+
+# Cross-compile on Debian/Ubuntu for Windows (mingw32 package)
+make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
+</pre>
+<p id="cross2">
+The <tt>CROSS</tt> prefix allows specifying a standard GNU cross-compile
+toolchain (Binutils, GCC and a matching libc). The prefix may vary
+depending on the <tt>--target</tt> the toolchain was built for (note the
+<tt>CROSS</tt> prefix has a trailing <tt>"-"</tt>). The examples below
+use the canonical toolchain triplets for Linux.
+</p>
+<p>
+Since there's often no easy way to detect CPU features at runtime, it's
+important to compile with the proper CPU or architecture settings. You
+can specify these when building the toolchain yourself. Or add
+<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For
+ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting,
+too. Otherwise LuaJIT may not run at the full performance of your target
+CPU.
+</p>
+<pre class="code">
+# ARM soft-float
+make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
+     TARGET_CFLAGS="-mfloat-abi=soft"
+
+# ARM soft-float ABI with VFP (example for Cortex-A8)
+make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
+     TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp"
+
+# ARM hard-float ABI with VFP (armhf, requires recent toolchain)
+make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
+
+# PPC
+make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
+# PPC/e500v2 (fast interpreter only)
+make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
+
+# MIPS big-endian
+make HOST_CC="gcc -m32" CROSS=mips-linux-
+# MIPS little-endian
+make HOST_CC="gcc -m32" CROSS=mipsel-linux-
+</pre>
+<p>
+You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/sdk/ndk/index.html"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>.
+The environment variables need to match the install locations and the
+desired target platform. E.g. Android&nbsp;4.0 corresponds to ABI level&nbsp;14.
+For details check the folder <tt>docs</tt> in the NDK directory.
+</p>
+<p>
+Only a few common variations for the different CPUs, ABIs and platforms
+are listed. Please use your own judgement for which combination you want
+to build/deploy or which lowest common denominator you want to pick:
+</p>
+<pre class="code">
+# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo)
+NDK=/opt/android/ndk
+NDKABI=8
+NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
+NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
+NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
+make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
+
+# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS)
+NDK=/opt/android/ndk
+NDKABI=14
+NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
+NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
+NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
+NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8"
+make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH"
+
+# Android/MIPS, mips (MIPS32R1 hard-float), Android 4.0+ (ICS)
+NDK=/opt/android/ndk
+NDKABI=14
+NDKVER=$NDK/toolchains/mipsel-linux-android-4.6
+NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android-
+NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips"
+make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
+
+# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS)
+NDK=/opt/android/ndk
+NDKABI=14
+NDKVER=$NDK/toolchains/x86-4.6
+NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android-
+NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
+make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
+</pre>
+<p>
+You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>.
+The environment variables need to match the iOS SDK version:
+</p>
+<p style="font-size: 8pt;">
+Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
+are not allowed to generate code at runtime. You'll only get the performance
+of the LuaJIT interpreter on iOS. This is still faster than plain Lua, but
+much slower than the JIT compiler. Please complain to Apple, not me.
+Or use Android. :-p
+</p>
+<pre class="code">
+IXCODE=`xcode-select -print-path`
+ISDK=$IXCODE/Platforms/iPhoneOS.platform/Developer
+ISDKVER=iPhoneOS6.0.sdk
+ISDKP=$ISDK/usr/bin/
+ISDKF="-arch armv7 -isysroot $ISDK/SDKs/$ISDKVER"
+make HOST_CC="gcc -m32 -arch i386" CROSS=$ISDKP TARGET_FLAGS="$ISDKF" \
+     TARGET_SYS=iOS
+</pre>
+
+<h3 id="consoles">Cross-compiling for consoles</h3>
+<p>
+Building LuaJIT for consoles requires both a supported host compiler
+(x86 or x64) and a cross-compiler (to PPC or ARM) from the official
+console SDK.
+</p>
+<p>
+Due to restrictions on consoles, the JIT compiler is disabled and only
+the fast interpreter is built. This is still faster than plain Lua,
+but much slower than the JIT compiler. The FFI is disabled, too, since
+it's not very useful in such an environment.
+</p>
+<p>
+The following commands build a static library <tt>libluajit.a</tt>,
+which can be linked against your game, just like the Lua library.
+</p>
+<p>
+To cross-compile for <b id="ps3">PS3</b> from a Linux host (requires
+32&nbsp;bit GCC, i.e. multilib Linux/x64) or a Windows host (requires
+32&nbsp;bit MinGW), run this command:
+</p>
+<pre class="code">
+make HOST_CC="gcc -m32" CROSS=ppu-lv2-
+</pre>
+<p>
+To cross-compile for <b id="ps4">PS4</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and
+run the following commands:
+</p>
+<pre class="code">
+cd src
+ps4build
+</pre>
+<p>
+To cross-compile for <b id="psvita">PS Vita</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and
+run the following commands:
+</p>
+<pre class="code">
+cd src
+psvitabuild
+</pre>
+<p>
+To cross-compile for <b id="xbox360">Xbox 360</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and run
+the following commands:
+</p>
+<pre class="code">
+cd src
+xedkbuild
+</pre>
+
+<h2 id="embed">Embedding LuaJIT</h2>
+<p>
+LuaJIT is API-compatible with Lua 5.1. If you've already embedded Lua
+into your application, you probably don't need to do anything to switch
+to LuaJIT, except link with a different library:
+</p>
+<ul>
+<li>It's strongly suggested to build LuaJIT separately using the supplied
+build system. Please do <em>not</em> attempt to integrate the individual
+source files into your build tree. You'll most likely get the internal build
+dependencies wrong or mess up the compiler flags. Treat LuaJIT like any
+other external library and link your application with either the dynamic
+or static library, depending on your needs.</li>
+<li>If you want to load C modules compiled for plain Lua
+with <tt>require()</tt>, you need to make sure the public symbols
+(e.g. <tt>lua_pushnumber</tt>) are exported, too:
+<ul><li>On POSIX systems you can either link to the shared library
+or link the static library into your application. In the latter case
+you'll need to export all public symbols from your main executable
+(e.g. <tt>-Wl,-E</tt> on Linux) and add the external dependencies
+(e.g. <tt>-lm -ldl</tt> on Linux).</li>
+<li>Since Windows symbols are bound to a specific DLL name, you need to
+link to the <tt>lua51.dll</tt> created by the LuaJIT build (do not rename
+the DLL). You may link LuaJIT statically on Windows only if you don't
+intend to load Lua/C modules at runtime.
+</li></ul>
+</li>
+<li>
+If you're building a 64 bit application on OSX which links directly or
+indirectly against LuaJIT, you need to link your main executable
+with these flags:
+<pre class="code">
+-pagezero_size 10000 -image_base 100000000
+</pre>
+Also, it's recommended to <tt>rebase</tt> all (self-compiled) shared libraries
+which are loaded at runtime on OSX/x64 (e.g. C extension modules for Lua).
+See: <tt>man rebase</tt>
+</li>
+</ul>
+<p>Additional hints for initializing LuaJIT using the C API functions:</p>
+<ul>
+<li>Here's a
+<a href="http://lua-users.org/wiki/SimpleLuaApiExample"><span class="ext">&raquo;</span>&nbsp;simple example</a>
+for embedding Lua or LuaJIT into your application.</li>
+<li>Make sure you use <tt>luaL_newstate</tt>. Avoid using
+<tt>lua_newstate</tt>, since this uses the (slower) default memory
+allocator from your system (no support for this on x64).</li>
+<li>Make sure you use <tt>luaL_openlibs</tt> and not the old Lua 5.0 style
+of calling <tt>luaopen_base</tt> etc. directly.</li>
+<li>To change or extend the list of standard libraries to load, copy
+<tt>src/lib_init.c</tt> to your project and modify it accordingly.
+Make sure the <tt>jit</tt> library is loaded or the JIT compiler
+will not be activated.</li>
+<li>The <tt>bit.*</tt> module for bitwise operations
+is already built-in. There's no need to statically link
+<a href="http://bitop.luajit.org/"><span class="ext">&raquo;</span>&nbsp;Lua BitOp</a> to your application.</li>
+</ul>
+
+<h2 id="distro">Hints for Distribution Maintainers</h2>
+<p>
+The LuaJIT build system has extra provisions for the needs of most
+POSIX-based distributions. If you're a package maintainer for
+a distribution, <em>please</em> make use of these features and
+avoid patching, subverting, autotoolizing or messing up the build system
+in unspeakable ways.
+</p>
+<p>
+There should be absolutely no need to patch <tt>luaconf.h</tt> or any
+of the Makefiles. And please do not hand-pick files for your packages &mdash;
+simply use whatever <tt>make install</tt> creates. There's a reason
+for all of the files <em>and</em> directories it creates.
+</p>
+<p>
+The build system uses GNU make and auto-detects most settings based on
+the host you're building it on. This should work fine for native builds,
+even when sandboxed. You may need to pass some of the following flags to
+<em>both</em> the <tt>make</tt> and the <tt>make install</tt> command lines
+for a regular distribution build:
+</p>
+<ul>
+<li><tt>PREFIX</tt> overrides the installation path and should usually
+be set to <tt>/usr</tt>. Setting this also changes the module paths and
+the paths needed to locate the shared library.</li>
+<li><tt>DESTDIR</tt> is an absolute path which allows you to install
+to a shadow tree instead of the root tree of the build system.</li>
+<li><tt>MULTILIB</tt> sets the architecture-specific library path component
+for multilib systems. The default is <tt>lib</tt>.</li>
+<li>Have a look at the top-level <tt>Makefile</tt> and <tt>src/Makefile</tt>
+for additional variables to tweak. The following variables <em>may</em> be
+overridden, but it's <em>not</em> recommended, except for special needs
+like cross-builds:
+<tt>BUILDMODE, CC, HOST_CC, STATIC_CC, DYNAMIC_CC, CFLAGS, HOST_CFLAGS,
+TARGET_CFLAGS, LDFLAGS, HOST_LDFLAGS, TARGET_LDFLAGS, TARGET_SHLDFLAGS,
+TARGET_FLAGS, LIBS, HOST_LIBS, TARGET_LIBS, CROSS, HOST_SYS, TARGET_SYS
+</tt></li>
+</ul>
+<p>
+The build system has a special target for an amalgamated build, i.e.
+<tt>make amalg</tt>. This compiles the LuaJIT core as one huge C file
+and allows GCC to generate faster and shorter code. Alas, this requires
+lots of memory during the build. This may be a problem for some users,
+that's why it's not enabled by default. But it shouldn't be a problem for
+most build farms. It's recommended that binary distributions use this
+target for their LuaJIT builds.
+</p>
+<p>
+The tl;dr version of the above:
+</p>
+<pre class="code">
+make amalg PREFIX=/usr && \
+make install PREFIX=/usr DESTDIR=/tmp/buildroot
+</pre>
+<p>
+Finally, if you encounter any difficulties, please
+<a href="contact.html">contact me</a> first, instead of releasing a broken
+package onto unsuspecting users. Because they'll usually gonna complain
+to me (the upstream) and not you (the package maintainer), anyway.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 234 - 0
luajit.mod/luajit/doc/luajit.html

@@ -0,0 +1,234 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>LuaJIT</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<meta name="description" content="LuaJIT is a Just-In-Time (JIT) compiler for the Lua language.">
+<style type="text/css">
+table.feature {
+  width: inherit;
+  line-height: 1.2;
+  margin: 0;
+}
+table.feature td {
+  width: 80px;
+  height: 40px;
+  vertical-align: middle;
+  text-align: center;
+  font-weight: bold;
+  border: 4px solid #e6ecff;
+  border-radius: 12px;
+}
+table.os td {
+  background: #7080d0;
+  background-image: linear-gradient(#4060c0 10%, #b0b0ff 95%);
+  background-image: -moz-linear-gradient(#4060c0 10%, #b0b0ff 95%);
+  background-image: -webkit-linear-gradient(#4060c0 10%, #b0b0ff 95%);
+  background-image: -o-linear-gradient(#4060c0 10%, #b0b0ff 95%);
+  background-image: -ms-linear-gradient(#4060c0 10%, #b0b0ff 95%);
+}
+table.os1 td {
+  color: #ffff80;
+}
+table.os2 td {
+  color: #ffa040;
+}
+table.os3 td {
+  color: #40ffff;
+}
+table.compiler td {
+  color: #2080ff;
+  background: #62bf41;
+  background-image: linear-gradient(#62bf41 10%, #b0ffb0 95%);
+  background-image: -moz-linear-gradient(#62bf41 10%, #b0ffb0 95%);
+  background-image: -webkit-linear-gradient(#62bf41 10%, #b0ffb0 95%);
+  background-image: -o-linear-gradient(#62bf41 10%, #b0ffb0 95%);
+  background-image: -ms-linear-gradient(#62bf41 10%, #b0ffb0 95%);
+}
+table.cpu td {
+  color: #ffff00;
+  background: #cf7251;
+  background-image: linear-gradient(#bf6241 10%, #ffb0b0 95%);
+  background-image: -moz-linear-gradient(#bf6241 10%, #ffb0b0 95%);
+  background-image: -webkit-linear-gradient(#bf6241 10%, #ffb0b0 95%);
+  background-image: -o-linear-gradient(#bf6241 10%, #ffb0b0 95%);
+  background-image: -ms-linear-gradient(#bf6241 10%, #ffb0b0 95%);
+}
+table.fcompat td {
+  color: #2060e0;
+  background: #61cfcf;
+  background-image: linear-gradient(#41bfbf 10%, #b0ffff 95%);
+  background-image: -moz-linear-gradient(#41bfbf 10%, #b0ffff 95%);
+  background-image: -webkit-linear-gradient(#41bfbf 10%, #b0ffff 95%);
+  background-image: -o-linear-gradient(#41bfbf 10%, #b0ffff 95%);
+  background-image: -ms-linear-gradient(#41bfbf 10%, #b0ffff 95%);
+}
+table.stats td {
+  color: #ffffff;
+  background: #a0a0a0;
+  background-image: linear-gradient(#808080 10%, #d0d0d0 95%);
+  background-image: -moz-linear-gradient(#808080 10%, #d0d0d0 95%);
+  background-image: -webkit-linear-gradient(#808080 10%, #d0d0d0 95%);
+  background-image: -o-linear-gradient(#808080 10%, #d0d0d0 95%);
+  background-image: -ms-linear-gradient(#808080 10%, #d0d0d0 95%);
+}
+table.stats td.speed {
+  color: #ff4020;
+}
+table.stats td.kb {
+  color: #ffff80;
+  background: #808080;
+  background-image: linear-gradient(#606060 10%, #c0c0c0 95%);
+  background-image: -moz-linear-gradient(#606060 10%, #c0c0c0 95%);
+  background-image: -webkit-linear-gradient(#606060 10%, #c0c0c0 95%);
+  background-image: -o-linear-gradient(#606060 10%, #c0c0c0 95%);
+  background-image: -ms-linear-gradient(#606060 10%, #c0c0c0 95%);
+}
+table.feature small {
+  font-size: 50%;
+}
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>LuaJIT</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a class="current" href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT is a <b>Just-In-Time Compiler</b> (JIT) for the
+<a href="http://www.lua.org/"><span class="ext">&raquo;</span>&nbsp;Lua</a> programming language.
+Lua is a powerful, dynamic and light-weight programming language.
+It may be embedded or used as a general-purpose, stand-alone language.
+</p>
+<p>
+LuaJIT is Copyright &copy; 2005-2015 Mike Pall, released under the
+<a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
+</p>
+<p>
+</p>
+
+<h2>Compatibility</h2>
+<table class="feature os os1">
+<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>OSX</td><td>POSIX</td></tr>
+</table>
+<table class="feature os os2">
+<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
+</table>
+<table class="feature os os3">
+<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td></tr>
+</table>
+<table class="feature compiler">
+<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
+</table>
+<table class="feature cpu">
+<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr>
+</table>
+<table class="feature fcompat">
+<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
+</table>
+
+<h2>Overview</h2>
+<table class="feature stats">
+<tr>
+<td class="speed">3x<br>-&nbsp;&nbsp;100x</td>
+<td class="kb">115&nbsp;<small>KB</small><br>VM</td>
+<td class="kb">90&nbsp;<small>KB</small><br>JIT</td>
+<td class="kloc">63&nbsp;<small>KLOC</small><br>C</td>
+<td class="kloc">24&nbsp;<small>KLOC</small><br>ASM</td>
+<td class="kloc">11&nbsp;<small>KLOC</small><br>Lua</td>
+</tr>
+</table>
+<p style="margin-top: 1em;">
+LuaJIT has been successfully used as a <b>scripting middleware</b> in
+games, appliances, network and graphics apps, numerical simulations,
+trading platforms and many other specialty applications. It scales from
+embedded devices, smartphones, desktops up to server farms. It combines
+high flexibility with <a href="http://luajit.org/performance.html"><span class="ext">&raquo;</span>&nbsp;high performance</a>
+and an unmatched <b>low memory footprint</b>.
+</p>
+<p>
+LuaJIT has been in continuous development since 2005. It's widely
+considered to be <b>one of the fastest dynamic language
+implementations</b>. It has outperformed other dynamic languages on many
+cross-language benchmarks since its first release &mdash; often by a
+substantial margin.
+</p>
+<p>
+For <b>LuaJIT 2.0</b>, the whole VM has been rewritten from the ground up
+and relentlessly optimized for performance. It combines a <b>high-speed
+interpreter</b>, written in assembler, with a <b>state-of-the-art JIT
+compiler</b>.
+</p>
+<p>
+An innovative <b>trace compiler</b> is integrated with advanced,
+SSA-based optimizations and highly tuned code generation backends.
+A substantial reduction of the overhead associated with dynamic languages
+allows it to break into the performance range traditionally reserved for
+offline, static language compilers.
+</p>
+
+<h2>More ...</h2>
+<p>
+Please select a sub-topic in the navigation bar to learn more about LuaJIT.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 306 - 0
luajit.mod/luajit/doc/running.html

@@ -0,0 +1,306 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Running LuaJIT</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+table.opt {
+  line-height: 1.2;
+}
+tr.opthead td {
+  font-weight: bold;
+}
+td.flag_name {
+  width: 4em;
+}
+td.flag_level {
+  width: 2em;
+  text-align: center;
+}
+td.param_name {
+  width: 6em;
+}
+td.param_default {
+  width: 4em;
+  text-align: right;
+}
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Running LuaJIT</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a class="current" href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT has only a single stand-alone executable, called <tt>luajit</tt> on
+POSIX systems or <tt>luajit.exe</tt> on Windows. It can be used to run simple
+Lua statements or whole Lua applications from the command line. It has an
+interactive mode, too.
+</p>
+
+<h2 id="options">Command Line Options</h2>
+<p>
+The <tt>luajit</tt> stand-alone executable is just a slightly modified
+version of the regular <tt>lua</tt> stand-alone executable.
+It supports the same basic options, too. <tt>luajit&nbsp;-h</tt>
+prints a short list of the available options. Please have a look at the
+<a href="http://www.lua.org/manual/5.1/manual.html#6"><span class="ext">&raquo;</span>&nbsp;Lua manual</a>
+for details.
+</p>
+<p>
+LuaJIT has some additional options:
+</p>
+
+<h3 id="opt_b"><tt>-b[options] input output</tt></h3>
+<p>
+This option saves or lists bytecode. The following additional options
+are accepted:
+</p>
+<ul>
+<li><tt>-l</tt> &mdash; Only list bytecode.</li>
+<li><tt>-s</tt> &mdash; Strip debug info (this is the default).</li>
+<li><tt>-g</tt> &mdash; Keep debug info.</li>
+<li><tt>-n name</tt> &mdash; Set module name (default: auto-detect from input name)</li>
+<li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li>
+<li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li>
+<li><tt>-o os</tt> &mdash; Override OS for object files (default: native).</li>
+<li><tt>-e chunk</tt> &mdash; Use chunk string as input.</li>
+<li><tt>-</tt> (a single minus sign) &mdash; Use stdin as input and/or stdout as output.</li>
+</ul>
+<p>
+The output file type is auto-detected from the extension of the output
+file name:
+</p>
+<ul>
+<li><tt>c</tt> &mdash; C source file, exported bytecode data.</li>
+<li><tt>h</tt> &mdash; C header file, static bytecode data.</li>
+<li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data
+(OS- and architecture-specific).</li>
+<li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable).
+</ul>
+<p>
+Notes:
+</p>
+<ul>
+<li>See also <a href="extensions.html#string_dump">string.dump()</a>
+for information on bytecode portability and compatibility.</li>
+<li>A file in raw bytecode format is auto-detected and can be loaded like
+any Lua source file. E.g. directly from the command line or with
+<tt>loadfile()</tt>, <tt>dofile()</tt> etc.</li>
+<li>To statically embed the bytecode of a module in your application,
+generate an object file and just link it with your application.</li>
+<li>On most ELF-based systems (e.g. Linux) you need to explicitly export the
+global symbols when linking your application, e.g. with: <tt>-Wl,-E</tt></li>
+<li><tt>require()</tt> tries to load embedded bytecode data from exported
+symbols (in <tt>*.exe</tt> or <tt>lua51.dll</tt> on Windows) and from
+shared libraries in <tt>package.cpath</tt>.</li>
+</ul>
+<p>
+Typical usage examples:
+</p>
+<pre class="code">
+luajit -b test.lua test.out                 # Save bytecode to test.out
+luajit -bg test.lua test.out                # Keep debug info
+luajit -be "print('hello world')" test.out  # Save cmdline script
+
+luajit -bl test.lua                         # List to stdout
+luajit -bl test.lua test.txt                # List to test.txt
+luajit -ble "print('hello world')"          # List cmdline script
+
+luajit -b test.lua test.obj                 # Generate object file
+# Link test.obj with your application and load it with require("test")
+</pre>
+
+<h3 id="opt_j"><tt>-j cmd[=arg[,arg...]]</tt></h3>
+<p>
+This option performs a LuaJIT control command or activates one of the
+loadable extension modules. The command is first looked up in the
+<tt>jit.*</tt> library. If no matching function is found, a module
+named <tt>jit.&lt;cmd&gt;</tt> is loaded and the <tt>start()</tt>
+function of the module is called with the specified arguments (if
+any). The space between <tt>-j</tt> and <tt>cmd</tt> is optional.
+</p>
+<p>
+Here are the available LuaJIT control commands:
+</p>
+<ul>
+<li id="j_on"><tt>-jon</tt> &mdash; Turns the JIT compiler on (default).</li>
+<li id="j_off"><tt>-joff</tt> &mdash; Turns the JIT compiler off (only use the interpreter).</li>
+<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
+<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
+<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
+</ul>
+<p>
+The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
+written in Lua. They are mainly used for debugging the JIT compiler
+itself. For a description of their options and output format, please
+read the comment block at the start of their source.
+They can be found in the <tt>lib</tt> directory of the source
+distribution or installed under the <tt>jit</tt> directory. By default
+this is <tt>/usr/local/share/luajit-2.0.4/jit</tt> on POSIX
+systems.
+</p>
+
+<h3 id="opt_O"><tt>-O[level]</tt><br>
+<tt>-O[+]flag</tt>&nbsp;&nbsp;&nbsp;<tt>-O-flag</tt><br>
+<tt>-Oparam=value</tt></h3>
+<p>
+This options allows fine-tuned control of the optimizations used by
+the JIT compiler. This is mainly intended for debugging LuaJIT itself.
+Please note that the JIT compiler is extremely fast (we are talking
+about the microsecond to millisecond range). Disabling optimizations
+doesn't have any visible impact on its overhead, but usually generates
+code that runs slower.
+</p>
+<p>
+The first form sets an optimization level &mdash; this enables a
+specific mix of optimization flags. <tt>-O0</tt> turns off all
+optimizations and higher numbers enable more optimizations. Omitting
+the level (i.e. just <tt>-O</tt>) sets the default optimization level,
+which is <tt>-O3</tt> in the current version.
+</p>
+<p>
+The second form adds or removes individual optimization flags.
+The third form sets a parameter for the VM or the JIT compiler
+to a specific value.
+</p>
+<p>
+You can either use this option multiple times (like <tt>-Ocse
+-O-dce -Ohotloop=10</tt>) or separate several settings with a comma
+(like <tt>-O+cse,-dce,hotloop=10</tt>). The settings are applied from
+left to right and later settings override earlier ones. You can freely
+mix the three forms, but note that setting an optimization level
+overrides all earlier flags.
+</p>
+<p>
+Here are the available flags and at what optimization levels they
+are enabled:
+</p>
+<table class="opt">
+<tr class="opthead">
+<td class="flag_name">Flag</td>
+<td class="flag_level">-O1</td>
+<td class="flag_level">-O2</td>
+<td class="flag_level">-O3</td>
+<td class="flag_desc">&nbsp;</td>
+</tr>
+<tr class="odd separate">
+<td class="flag_name">fold</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Constant Folding, Simplifications and Reassociation</td></tr>
+<tr class="even">
+<td class="flag_name">cse</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Common-Subexpression Elimination</td></tr>
+<tr class="odd">
+<td class="flag_name">dce</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Dead-Code Elimination</td></tr>
+<tr class="even">
+<td class="flag_name">narrow</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Narrowing of numbers to integers</td></tr>
+<tr class="odd">
+<td class="flag_name">loop</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Loop Optimizations (code hoisting)</td></tr>
+<tr class="even">
+<td class="flag_name">fwd</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Load Forwarding (L2L) and Store Forwarding (S2L)</td></tr>
+<tr class="odd">
+<td class="flag_name">dse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Dead-Store Elimination</td></tr>
+<tr class="even">
+<td class="flag_name">abc</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Array Bounds Check Elimination</td></tr>
+<tr class="odd">
+<td class="flag_name">sink</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Allocation/Store Sinking</td></tr>
+<tr class="even">
+<td class="flag_name">fuse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Fusion of operands into instructions</td></tr>
+</table>
+<p>
+Here are the parameters and their default settings:
+</p>
+<table class="opt">
+<tr class="opthead">
+<td class="param_name">Parameter</td>
+<td class="param_default">Default</td>
+<td class="param_desc">&nbsp;</td>
+</tr>
+<tr class="odd separate">
+<td class="param_name">maxtrace</td><td class="param_default">1000</td><td class="param_desc">Max. number of traces in the cache</td></tr>
+<tr class="even">
+<td class="param_name">maxrecord</td><td class="param_default">4000</td><td class="param_desc">Max. number of recorded IR instructions</td></tr>
+<tr class="odd">
+<td class="param_name">maxirconst</td><td class="param_default">500</td><td class="param_desc">Max. number of IR constants of a trace</td></tr>
+<tr class="even">
+<td class="param_name">maxside</td><td class="param_default">100</td><td class="param_desc">Max. number of side traces of a root trace</td></tr>
+<tr class="odd">
+<td class="param_name">maxsnap</td><td class="param_default">500</td><td class="param_desc">Max. number of snapshots for a trace</td></tr>
+<tr class="even separate">
+<td class="param_name">hotloop</td><td class="param_default">56</td><td class="param_desc">Number of iterations to detect a hot loop or hot call</td></tr>
+<tr class="odd">
+<td class="param_name">hotexit</td><td class="param_default">10</td><td class="param_desc">Number of taken exits to start a side trace</td></tr>
+<tr class="even">
+<td class="param_name">tryside</td><td class="param_default">4</td><td class="param_desc">Number of attempts to compile a side trace</td></tr>
+<tr class="odd separate">
+<td class="param_name">instunroll</td><td class="param_default">4</td><td class="param_desc">Max. unroll factor for instable loops</td></tr>
+<tr class="even">
+<td class="param_name">loopunroll</td><td class="param_default">15</td><td class="param_desc">Max. unroll factor for loop ops in side traces</td></tr>
+<tr class="odd">
+<td class="param_name">callunroll</td><td class="param_default">3</td><td class="param_desc">Max. unroll factor for pseudo-recursive calls</td></tr>
+<tr class="even">
+<td class="param_name">recunroll</td><td class="param_default">2</td><td class="param_desc">Min. unroll factor for true recursion</td></tr>
+<tr class="odd separate">
+<td class="param_name">sizemcode</td><td class="param_default">32</td><td class="param_desc">Size of each machine code area in KBytes (Windows: 64K)</td></tr>
+<tr class="even">
+<td class="param_name">maxmcode</td><td class="param_default">512</td><td class="param_desc">Max. total size of all machine code areas in KBytes</td></tr>
+</table>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 116 - 0
luajit.mod/luajit/doc/status.html

@@ -0,0 +1,116 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Status</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+ul li { padding-bottom: 0.3em; }
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Status</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li></ul>
+</li><li>
+<a class="current" href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+<span style="color: #0000c0;">LuaJIT&nbsp;2.0</span> is the current
+<span style="color: #0000c0;">stable branch</span>. This branch is in
+feature-freeze &mdash; new features will only be added to LuaJIT&nbsp;2.1.
+</p>
+
+<h2>Current Status</h2>
+<p>
+LuaJIT ought to run all Lua&nbsp;5.1-compatible source code just fine.
+It's considered a serious bug if the VM crashes or produces unexpected
+results &mdash; please report this.
+</p>
+<p>
+Known incompatibilities and issues in LuaJIT&nbsp;2.0:
+</p>
+<ul>
+<li>
+There are some differences in <b>implementation-defined</b> behavior.
+These either have a good reason, are arbitrary design choices
+or are due to quirks in the VM. The latter cases may get fixed if a
+demonstrable need is shown.
+</li>
+<li>
+The Lua <b>debug API</b> is missing a couple of features (return
+hooks for non-Lua functions) and shows slightly different behavior
+in LuaJIT (no per-coroutine hooks, no tail call counting).
+</li>
+<li>
+Some checks are missing in the JIT-compiled code for obscure situations
+with <b>open upvalues aliasing</b> one of the SSA slots later on (or
+vice versa). Bonus points, if you can find a real world test case for
+this.
+</li>
+<li>
+Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
+handled correctly. The error may fall through an on-trace
+<tt>pcall</tt> or it may be passed on to the function set with
+<tt>lua_atpanic</tt> on x64. This issue will be fixed with the new
+garbage collector.
+</li>
+</ul>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 456 - 0
luajit.mod/luajit/dynasm/dasm_arm.h

@@ -0,0 +1,456 @@
+/*
+** DynASM ARM encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"arm"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. */
+enum {
+  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+  /* The following actions need a buffer position. */
+  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+  /* The following actions also have an argument. */
+  DASM_REL_PC, DASM_LABEL_PC,
+  DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12, DASM_IMMV8,
+  DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals (bias -10). */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
+  }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+  int i;
+  for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30))
+    if (n <= 255) return (int)(n + (i << 8));
+  return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    unsigned int ins = *p++;
+    unsigned int action = (ins >> 16);
+    if (action >= DASM__MAX) {
+      ofs += 4;
+    } else {
+      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+      switch (action) {
+      case DASM_STOP: goto stop;
+      case DASM_SECTION:
+	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+	D->section = &D->sections[n]; goto stop;
+      case DASM_ESC: p++; ofs += 4; break;
+      case DASM_REL_EXT: break;
+      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+      case DASM_REL_LG:
+	n = (ins & 2047) - 10; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl += 10; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	break;
+      case DASM_LABEL_LG:
+	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+	}
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_IMM:
+      case DASM_IMM16:
+#ifdef DASM_CHECKS
+	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+	if ((ins & 0x8000))
+	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+	else
+	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+      case DASM_IMMV8:
+	CK((n & 3) == 0, RANGE_I);
+	n >>= 2;
+      case DASM_IMML8:
+      case DASM_IMML12:
+	CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
+		    (((-n)>>((ins>>5)&31)) == 0), RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM12:
+	CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: p++; break;
+	case DASM_REL_EXT: break;
+	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+	case DASM_IMM: case DASM_IMM12: case DASM_IMM16:
+	case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break;
+	}
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st)	((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned int *cp = (unsigned int *)buffer;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: *cp++ = *p++; break;
+	case DASM_REL_EXT:
+	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
+	  goto patchrel;
+	case DASM_ALIGN:
+	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+	  break;
+	case DASM_REL_LG:
+	  CK(n >= 0, UNDEF_LG);
+	case DASM_REL_PC:
+	  CK(n >= 0, UNDEF_PC);
+	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4;
+	patchrel:
+	  if ((ins & 0x800) == 0) {
+	    CK((n & 3) == 0 && ((n+0x02000000) >> 26) == 0, RANGE_REL);
+	    cp[-1] |= ((n >> 2) & 0x00ffffff);
+	  } else if ((ins & 0x1000)) {
+	    CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL);
+	    goto patchimml8;
+	  } else if ((ins & 0x2000) == 0) {
+	    CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL);
+	    goto patchimml;
+	  } else {
+	    CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL);
+	    n >>= 2;
+	    goto patchimml;
+	  }
+	  break;
+	case DASM_LABEL_LG:
+	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  break;
+	case DASM_LABEL_PC: break;
+	case DASM_IMM:
+	  cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31);
+	  break;
+	case DASM_IMM12:
+	  cp[-1] |= dasm_imm12((unsigned int)n);
+	  break;
+	case DASM_IMM16:
+	  cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff);
+	  break;
+	case DASM_IMML8: patchimml8:
+	  cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) :
+			     ((-n & 0x0f) | ((-n & 0xf0) << 4));
+	  break;
+	case DASM_IMML12: case DASM_IMMV8: patchimml:
+	  cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n);
+	  break;
+	default: *cp++ = ins; break;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+  return D->status;
+}
+#endif
+

+ 1125 - 0
luajit.mod/luajit/dynasm/dasm_arm.lua

@@ -0,0 +1,1125 @@
+------------------------------------------------------------------------------
+-- DynASM ARM module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+  arch =	"arm",
+  description =	"DynASM ARM module",
+  version =	"1.3.0",
+  vernum =	 10300,
+  release =	"2011-05-05",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "STOP", "SECTION", "ESC", "REL_EXT",
+  "ALIGN", "REL_LG", "LABEL_LG",
+  "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+  map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+  for i = 1,nn-1 do
+    assert(out:write("0x", tohex(actlist[i]), ",\n"))
+  end
+  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxw(w * 0x10000 + (val or 0))
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+  if n <= 0x000fffff then waction("ESC") end
+  wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  if n <= 0x000fffff then
+    insert(actlist, pos+1, n)
+    n = map_action.ESC * 0x10000
+  end
+  actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20,next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0,next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0,next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { sp = "r13", lr = "r14", pc = "r15", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", }
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, }
+
+local map_cond = {
+  eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+  hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+  hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+-- Template strings for ARM instructions.
+local map_op = {
+  -- Basic data processing instructions.
+  and_3 = "e0000000DNPs",
+  eor_3 = "e0200000DNPs",
+  sub_3 = "e0400000DNPs",
+  rsb_3 = "e0600000DNPs",
+  add_3 = "e0800000DNPs",
+  adc_3 = "e0a00000DNPs",
+  sbc_3 = "e0c00000DNPs",
+  rsc_3 = "e0e00000DNPs",
+  tst_2 = "e1100000NP",
+  teq_2 = "e1300000NP",
+  cmp_2 = "e1500000NP",
+  cmn_2 = "e1700000NP",
+  orr_3 = "e1800000DNPs",
+  mov_2 = "e1a00000DPs",
+  bic_3 = "e1c00000DNPs",
+  mvn_2 = "e1e00000DPs",
+
+  and_4 = "e0000000DNMps",
+  eor_4 = "e0200000DNMps",
+  sub_4 = "e0400000DNMps",
+  rsb_4 = "e0600000DNMps",
+  add_4 = "e0800000DNMps",
+  adc_4 = "e0a00000DNMps",
+  sbc_4 = "e0c00000DNMps",
+  rsc_4 = "e0e00000DNMps",
+  tst_3 = "e1100000NMp",
+  teq_3 = "e1300000NMp",
+  cmp_3 = "e1500000NMp",
+  cmn_3 = "e1700000NMp",
+  orr_4 = "e1800000DNMps",
+  mov_3 = "e1a00000DMps",
+  bic_4 = "e1c00000DNMps",
+  mvn_3 = "e1e00000DMps",
+
+  lsl_3 = "e1a00000DMws",
+  lsr_3 = "e1a00020DMws",
+  asr_3 = "e1a00040DMws",
+  ror_3 = "e1a00060DMws",
+  rrx_2 = "e1a00060DMs",
+
+  -- Multiply and multiply-accumulate.
+  mul_3 = "e0000090NMSs",
+  mla_4 = "e0200090NMSDs",
+  umaal_4 = "e0400090DNMSs",	-- v6
+  mls_4 = "e0600090DNMSs",	-- v6T2
+  umull_4 = "e0800090DNMSs",
+  umlal_4 = "e0a00090DNMSs",
+  smull_4 = "e0c00090DNMSs",
+  smlal_4 = "e0e00090DNMSs",
+
+  -- Halfword multiply and multiply-accumulate.
+  smlabb_4 = "e1000080NMSD",	-- v5TE
+  smlatb_4 = "e10000a0NMSD",	-- v5TE
+  smlabt_4 = "e10000c0NMSD",	-- v5TE
+  smlatt_4 = "e10000e0NMSD",	-- v5TE
+  smlawb_4 = "e1200080NMSD",	-- v5TE
+  smulwb_3 = "e12000a0NMS",	-- v5TE
+  smlawt_4 = "e12000c0NMSD",	-- v5TE
+  smulwt_3 = "e12000e0NMS",	-- v5TE
+  smlalbb_4 = "e1400080NMSD",	-- v5TE
+  smlaltb_4 = "e14000a0NMSD",	-- v5TE
+  smlalbt_4 = "e14000c0NMSD",	-- v5TE
+  smlaltt_4 = "e14000e0NMSD",	-- v5TE
+  smulbb_3 = "e1600080NMS",	-- v5TE
+  smultb_3 = "e16000a0NMS",	-- v5TE
+  smulbt_3 = "e16000c0NMS",	-- v5TE
+  smultt_3 = "e16000e0NMS",	-- v5TE
+
+  -- Miscellaneous data processing instructions.
+  clz_2 = "e16f0f10DM", -- v5T
+  rev_2 = "e6bf0f30DM", -- v6
+  rev16_2 = "e6bf0fb0DM", -- v6
+  revsh_2 = "e6ff0fb0DM", -- v6
+  sel_3 = "e6800fb0DNM", -- v6
+  usad8_3 = "e780f010NMS", -- v6
+  usada8_4 = "e7800010NMSD", -- v6
+  rbit_2 = "e6ff0f30DM", -- v6T2
+  movw_2 = "e3000000DW", -- v6T2
+  movt_2 = "e3400000DW", -- v6T2
+  -- Note: the X encodes width-1, not width.
+  sbfx_4 = "e7a00050DMvX", -- v6T2
+  ubfx_4 = "e7e00050DMvX", -- v6T2
+  -- Note: the X encodes the msb field, not the width.
+  bfc_3 = "e7c0001fDvX", -- v6T2
+  bfi_4 = "e7c00010DMvX", -- v6T2
+
+  -- Packing and unpacking instructions.
+  pkhbt_3 = "e6800010DNM", pkhbt_4 = "e6800010DNMv", -- v6
+  pkhtb_3 = "e6800050DNM", pkhtb_4 = "e6800050DNMv", -- v6
+  sxtab_3 = "e6a00070DNM", sxtab_4 = "e6a00070DNMv", -- v6
+  sxtab16_3 = "e6800070DNM", sxtab16_4 = "e6800070DNMv", -- v6
+  sxtah_3 = "e6b00070DNM", sxtah_4 = "e6b00070DNMv", -- v6
+  sxtb_2 = "e6af0070DM", sxtb_3 = "e6af0070DMv", -- v6
+  sxtb16_2 = "e68f0070DM", sxtb16_3 = "e68f0070DMv", -- v6
+  sxth_2 = "e6bf0070DM", sxth_3 = "e6bf0070DMv", -- v6
+  uxtab_3 = "e6e00070DNM", uxtab_4 = "e6e00070DNMv", -- v6
+  uxtab16_3 = "e6c00070DNM", uxtab16_4 = "e6c00070DNMv", -- v6
+  uxtah_3 = "e6f00070DNM", uxtah_4 = "e6f00070DNMv", -- v6
+  uxtb_2 = "e6ef0070DM", uxtb_3 = "e6ef0070DMv", -- v6
+  uxtb16_2 = "e6cf0070DM", uxtb16_3 = "e6cf0070DMv", -- v6
+  uxth_2 = "e6ff0070DM", uxth_3 = "e6ff0070DMv", -- v6
+
+  -- Saturating instructions.
+  qadd_3 = "e1000050DMN",	-- v5TE
+  qsub_3 = "e1200050DMN",	-- v5TE
+  qdadd_3 = "e1400050DMN",	-- v5TE
+  qdsub_3 = "e1600050DMN",	-- v5TE
+  -- Note: the X for ssat* encodes sat_imm-1, not sat_imm.
+  ssat_3 = "e6a00010DXM", ssat_4 = "e6a00010DXMp", -- v6
+  usat_3 = "e6e00010DXM", usat_4 = "e6e00010DXMp", -- v6
+  ssat16_3 = "e6a00f30DXM", -- v6
+  usat16_3 = "e6e00f30DXM", -- v6
+
+  -- Parallel addition and subtraction.
+  sadd16_3 = "e6100f10DNM", -- v6
+  sasx_3 = "e6100f30DNM", -- v6
+  ssax_3 = "e6100f50DNM", -- v6
+  ssub16_3 = "e6100f70DNM", -- v6
+  sadd8_3 = "e6100f90DNM", -- v6
+  ssub8_3 = "e6100ff0DNM", -- v6
+  qadd16_3 = "e6200f10DNM", -- v6
+  qasx_3 = "e6200f30DNM", -- v6
+  qsax_3 = "e6200f50DNM", -- v6
+  qsub16_3 = "e6200f70DNM", -- v6
+  qadd8_3 = "e6200f90DNM", -- v6
+  qsub8_3 = "e6200ff0DNM", -- v6
+  shadd16_3 = "e6300f10DNM", -- v6
+  shasx_3 = "e6300f30DNM", -- v6
+  shsax_3 = "e6300f50DNM", -- v6
+  shsub16_3 = "e6300f70DNM", -- v6
+  shadd8_3 = "e6300f90DNM", -- v6
+  shsub8_3 = "e6300ff0DNM", -- v6
+  uadd16_3 = "e6500f10DNM", -- v6
+  uasx_3 = "e6500f30DNM", -- v6
+  usax_3 = "e6500f50DNM", -- v6
+  usub16_3 = "e6500f70DNM", -- v6
+  uadd8_3 = "e6500f90DNM", -- v6
+  usub8_3 = "e6500ff0DNM", -- v6
+  uqadd16_3 = "e6600f10DNM", -- v6
+  uqasx_3 = "e6600f30DNM", -- v6
+  uqsax_3 = "e6600f50DNM", -- v6
+  uqsub16_3 = "e6600f70DNM", -- v6
+  uqadd8_3 = "e6600f90DNM", -- v6
+  uqsub8_3 = "e6600ff0DNM", -- v6
+  uhadd16_3 = "e6700f10DNM", -- v6
+  uhasx_3 = "e6700f30DNM", -- v6
+  uhsax_3 = "e6700f50DNM", -- v6
+  uhsub16_3 = "e6700f70DNM", -- v6
+  uhadd8_3 = "e6700f90DNM", -- v6
+  uhsub8_3 = "e6700ff0DNM", -- v6
+
+  -- Load/store instructions.
+  str_2 = "e4000000DL", str_3 = "e4000000DL", str_4 = "e4000000DL",
+  strb_2 = "e4400000DL", strb_3 = "e4400000DL", strb_4 = "e4400000DL",
+  ldr_2 = "e4100000DL", ldr_3 = "e4100000DL", ldr_4 = "e4100000DL",
+  ldrb_2 = "e4500000DL", ldrb_3 = "e4500000DL", ldrb_4 = "e4500000DL",
+  strh_2 = "e00000b0DL", strh_3 = "e00000b0DL",
+  ldrh_2 = "e01000b0DL", ldrh_3 = "e01000b0DL",
+  ldrd_2 = "e00000d0DL", ldrd_3 = "e00000d0DL", -- v5TE
+  ldrsb_2 = "e01000d0DL", ldrsb_3 = "e01000d0DL",
+  strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE
+  ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL",
+
+  ldm_2 = "e8900000oR", ldmia_2 = "e8900000oR", ldmfd_2 = "e8900000oR",
+  ldmda_2 = "e8100000oR", ldmfa_2 = "e8100000oR",
+  ldmdb_2 = "e9100000oR", ldmea_2 = "e9100000oR",
+  ldmib_2 = "e9900000oR", ldmed_2 = "e9900000oR",
+  stm_2 = "e8800000oR", stmia_2 = "e8800000oR", stmfd_2 = "e8800000oR",
+  stmda_2 = "e8000000oR", stmfa_2 = "e8000000oR",
+  stmdb_2 = "e9000000oR", stmea_2 = "e9000000oR",
+  stmib_2 = "e9800000oR", stmed_2 = "e9800000oR",
+  pop_1 = "e8bd0000R", push_1 = "e92d0000R",
+
+  -- Branch instructions.
+  b_1 = "ea000000B",
+  bl_1 = "eb000000B",
+  blx_1 = "e12fff30C",
+  bx_1 = "e12fff10M",
+
+  -- Miscellaneous instructions.
+  nop_0 = "e1a00000",
+  mrs_1 = "e10f0000D",
+  bkpt_1 = "e1200070K", -- v5T
+  svc_1 = "ef000000T", swi_1 = "ef000000T",
+  ud_0 = "e7f001f0",
+
+  -- VFP instructions.
+  ["vadd.f32_3"] = "ee300a00dnm",
+  ["vadd.f64_3"] = "ee300b00Gdnm",
+  ["vsub.f32_3"] = "ee300a40dnm",
+  ["vsub.f64_3"] = "ee300b40Gdnm",
+  ["vmul.f32_3"] = "ee200a00dnm",
+  ["vmul.f64_3"] = "ee200b00Gdnm",
+  ["vnmul.f32_3"] = "ee200a40dnm",
+  ["vnmul.f64_3"] = "ee200b40Gdnm",
+  ["vmla.f32_3"] = "ee000a00dnm",
+  ["vmla.f64_3"] = "ee000b00Gdnm",
+  ["vmls.f32_3"] = "ee000a40dnm",
+  ["vmls.f64_3"] = "ee000b40Gdnm",
+  ["vnmla.f32_3"] = "ee100a40dnm",
+  ["vnmla.f64_3"] = "ee100b40Gdnm",
+  ["vnmls.f32_3"] = "ee100a00dnm",
+  ["vnmls.f64_3"] = "ee100b00Gdnm",
+  ["vdiv.f32_3"] = "ee800a00dnm",
+  ["vdiv.f64_3"] = "ee800b00Gdnm",
+
+  ["vabs.f32_2"] = "eeb00ac0dm",
+  ["vabs.f64_2"] = "eeb00bc0Gdm",
+  ["vneg.f32_2"] = "eeb10a40dm",
+  ["vneg.f64_2"] = "eeb10b40Gdm",
+  ["vsqrt.f32_2"] = "eeb10ac0dm",
+  ["vsqrt.f64_2"] = "eeb10bc0Gdm",
+  ["vcmp.f32_2"] = "eeb40a40dm",
+  ["vcmp.f64_2"] = "eeb40b40Gdm",
+  ["vcmpe.f32_2"] = "eeb40ac0dm",
+  ["vcmpe.f64_2"] = "eeb40bc0Gdm",
+  ["vcmpz.f32_1"] = "eeb50a40d",
+  ["vcmpz.f64_1"] = "eeb50b40Gd",
+  ["vcmpze.f32_1"] = "eeb50ac0d",
+  ["vcmpze.f64_1"] = "eeb50bc0Gd",
+
+  vldr_2 = "ed100a00dl|ed100b00Gdl",
+  vstr_2 = "ed000a00dl|ed000b00Gdl",
+  vldm_2 = "ec900a00or",
+  vldmia_2 = "ec900a00or",
+  vldmdb_2 = "ed100a00or",
+  vpop_1 = "ecbd0a00r",
+  vstm_2 = "ec800a00or",
+  vstmia_2 = "ec800a00or",
+  vstmdb_2 = "ed000a00or",
+  vpush_1 = "ed2d0a00r",
+
+  ["vmov.f32_2"] = "eeb00a40dm|eeb00a00dY",	-- #imm is VFPv3 only
+  ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY",	-- #imm is VFPv3 only
+  vmov_2 = "ee100a10Dn|ee000a10nD",
+  vmov_3 = "ec500a10DNm|ec400a10mDN|ec500b10GDNm|ec400b10GmDN",
+
+  vmrs_0 = "eef1fa10",
+  vmrs_1 = "eef10a10D",
+  vmsr_1 = "eee10a10D",
+
+  ["vcvt.s32.f32_2"] = "eebd0ac0dm",
+  ["vcvt.s32.f64_2"] = "eebd0bc0dGm",
+  ["vcvt.u32.f32_2"] = "eebc0ac0dm",
+  ["vcvt.u32.f64_2"] = "eebc0bc0dGm",
+  ["vcvtr.s32.f32_2"] = "eebd0a40dm",
+  ["vcvtr.s32.f64_2"] = "eebd0b40dGm",
+  ["vcvtr.u32.f32_2"] = "eebc0a40dm",
+  ["vcvtr.u32.f64_2"] = "eebc0b40dGm",
+  ["vcvt.f32.s32_2"] = "eeb80ac0dm",
+  ["vcvt.f64.s32_2"] = "eeb80bc0GdFm",
+  ["vcvt.f32.u32_2"] = "eeb80a40dm",
+  ["vcvt.f64.u32_2"] = "eeb80b40GdFm",
+  ["vcvt.f32.f64_2"] = "eeb70bc0dGm",
+  ["vcvt.f64.f32_2"] = "eeb70ac0GdFm",
+
+  -- VFPv4 only:
+  ["vfma.f32_3"] = "eea00a00dnm",
+  ["vfma.f64_3"] = "eea00b00Gdnm",
+  ["vfms.f32_3"] = "eea00a40dnm",
+  ["vfms.f64_3"] = "eea00b40Gdnm",
+  ["vfnma.f32_3"] = "ee900a40dnm",
+  ["vfnma.f64_3"] = "ee900b40Gdnm",
+  ["vfnms.f32_3"] = "ee900a00dnm",
+  ["vfnms.f64_3"] = "ee900b00Gdnm",
+
+  -- NYI: Advanced SIMD instructions.
+
+  -- NYI: I have no need for these instructions right now:
+  -- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh
+  -- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe
+  -- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb
+  -- stc, ldc, mcr, mcr2, mrc, mrc2, mcrr, mcrr2, mrrc, mrrc2, cdp, cdp2
+}
+
+-- Add mnemonics for "s" variants.
+do
+  local t = {}
+  for k,v in pairs(map_op) do
+    if sub(v, -1) == "s" then
+      local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2)
+      t[sub(k, 1, -3).."s"..sub(k, -2)] = v2
+    end
+  end
+  for k,v in pairs(t) do
+    map_op[k] = v
+  end
+end
+
+------------------------------------------------------------------------------
+
+local function parse_gpr(expr)
+  local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local r = match(expr, "^r(1?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 15 then return r, tp end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_gpr_pm(expr)
+  local pm, expr2 = match(expr, "^([+-]?)(.*)$")
+  return parse_gpr(expr2), (pm == "-")
+end
+
+local function parse_vr(expr, tp)
+  local t, r = match(expr, "^([sd])([0-9]+)$")
+  if t == tp then
+    r = tonumber(r)
+    if r <= 31 then
+      if t == "s" then return shr(r, 1), band(r, 1) end
+      return band(r, 15), shr(r, 4)
+    end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_reglist(reglist)
+  reglist = match(reglist, "^{%s*([^}]*)}$")
+  if not reglist then werror("register list expected") end
+  local rr = 0
+  for p in gmatch(reglist..",", "%s*([^,]*),") do
+    local rbit = shl(1, parse_gpr(gsub(p, "%s+$", "")))
+    if band(rr, rbit) ~= 0 then
+      werror("duplicate register `"..p.."'")
+    end
+    rr = rr + rbit
+  end
+  return rr
+end
+
+local function parse_vrlist(reglist)
+  local ta, ra, tb, rb = match(reglist,
+			   "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$")
+  ra, rb = tonumber(ra), tonumber(rb)
+  if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then
+    local nr = rb+1 - ra
+    if ta == "s" then
+      return shl(shr(ra,1),12)+shl(band(ra,1),22) + nr
+    else
+      return shl(band(ra,15),12)+shl(shr(ra,4),22) + nr*2 + 0x100
+    end
+  end
+  werror("register list expected")
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = tonumber(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n then
+      if signed then
+	local s = sar(m, bits-1)
+	if s == 0 then return shl(m, shift)
+	elseif s == -1 then return shl(m + shl(1, bits), shift) end
+      else
+	if sar(m, bits) == 0 then return shl(m, shift) end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    return 0
+  end
+end
+
+local function parse_imm12(imm)
+  local n = tonumber(imm)
+  if n then
+    local m = band(n)
+    for i=0,-15,-1 do
+      if shr(m, 8) == 0 then return m + shl(band(i, 15), 8) end
+      m = ror(m, 2)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM12", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm16(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = tonumber(imm)
+  if n then
+    if shr(n, 16) == 0 then return band(n, 0x0fff) + shl(band(n, 0xf000), 4) end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM16", 32*16, imm)
+    return 0
+  end
+end
+
+local function parse_imm_load(imm, ext)
+  local n = tonumber(imm)
+  if n then
+    if ext then
+      if n >= -255 and n <= 255 then
+	local up = 0x00800000
+	if n < 0 then n = -n; up = 0 end
+	return shl(band(n, 0xf0), 4) + band(n, 0x0f) + up
+      end
+    else
+      if n >= -4095 and n <= 4095 then
+	if n >= 0 then return n+0x00800000 end
+	return -n
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction(ext and "IMML8" or "IMML12", 32768 + shl(ext and 8 or 12, 5), imm)
+    return 0
+  end
+end
+
+local function parse_shift(shift, gprok)
+  if shift == "rrx" then
+    return 3 * 32
+  else
+    local s, s2 = match(shift, "^(%S+)%s*(.*)$")
+    s = map_shift[s]
+    if not s then werror("expected shift operand") end
+    if sub(s2, 1, 1) == "#" then
+      return parse_imm(s2, 5, 7, 0, false) + shl(s, 5)
+    else
+      if not gprok then werror("expected immediate shift operand") end
+      return shl(parse_gpr(s2), 8) + shl(s, 5) + 16
+    end
+  end
+end
+
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+end
+
+local function parse_load(params, nparams, n, op)
+  local oplo = band(op, 255)
+  local ext, ldrd = (oplo ~= 0), (oplo == 208)
+  local d
+  if (ldrd or oplo == 240) then
+    d = band(shr(op, 12), 15)
+    if band(d, 1) ~= 0 then werror("odd destination register") end
+  end
+  local pn = params[n]
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  local p2 = params[n+1]
+  if not p1 then
+    if not p2 then
+      if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then
+	local mode, n, s = parse_label(pn, false)
+	waction("REL_"..mode, n + (ext and 0x1800 or 0x0800), s, 1)
+	return op + 15 * 65536 + 0x01000000 + (ext and 0x00400000 or 0)
+      end
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local d, tp = parse_gpr(reg)
+	if tp then
+	  waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12),
+		  format(tp.ctypefmt, tailr))
+	  return op + shl(d, 16) + 0x01000000 + (ext and 0x00400000 or 0)
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  if wb == "!" then op = op + 0x00200000 end
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    local p3 = params[n+2]
+    op = op + shl(parse_gpr(p1), 16)
+    local imm = match(p2, "^#(.*)$")
+    if imm then
+      local m = parse_imm_load(imm, ext)
+      if p3 then werror("too many parameters") end
+      op = op + m + (ext and 0x00400000 or 0)
+    else
+      local m, neg = parse_gpr_pm(p2)
+      if ldrd and (m == d or m-1 == d) then werror("register conflict") end
+      op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000)
+      if p3 then op = op + parse_shift(p3) end
+    end
+  else
+    local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$")
+    op = op + shl(parse_gpr(p1a), 16) + 0x01000000
+    if p2 ~= "" then
+      local imm = match(p2, "^,%s*#(.*)$")
+      if imm then
+	local m = parse_imm_load(imm, ext)
+	op = op + m + (ext and 0x00400000 or 0)
+      else
+	local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$")
+	local m, neg = parse_gpr_pm(p2a)
+	if ldrd and (m == d or m-1 == d) then werror("register conflict") end
+	op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000)
+	if p3 ~= "" then
+	  if ext then werror("too many parameters") end
+	  op = op + parse_shift(p3)
+	end
+      end
+    else
+      if wb == "!" then werror("bad use of '!'") end
+      op = op + (ext and 0x00c00000 or 0x00800000)
+    end
+  end
+  return op
+end
+
+local function parse_vload(q)
+  local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$")
+  if reg then
+    local d = shl(parse_gpr(reg), 16)
+    if imm == "" then return d end
+    imm = match(imm, "^,%s*#(.*)$")
+    if imm then
+      local n = tonumber(imm)
+      if n then
+	if n >= -1020 and n <= 1020 and n%4 == 0 then
+	  return d + (n >= 0 and n/4+0x00800000 or -n/4)
+	end
+	werror("out of range immediate `"..imm.."'")
+      else
+	waction("IMMV8", 32768 + 32*8, imm)
+	return d
+      end
+    end
+  else
+    if match(q, "^[<>=%-]") or match(q, "^extern%s+") then
+      local mode, n, s = parse_label(q, false)
+      waction("REL_"..mode, n + 0x2800, s, 1)
+      return 15 * 65536
+    end
+    local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$")
+    if reg and tailr ~= "" then
+      local d, tp = parse_gpr(reg)
+      if tp then
+	waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr))
+	return shl(d, 16)
+      end
+    end
+  end
+  werror("expected address operand")
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+  local op = tonumber(sub(template, 1, 8), 16)
+  local n = 1
+  local vr = "s"
+
+  -- Process each character.
+  for p in gmatch(sub(template, 9), ".") do
+    local q = params[n]
+    if p == "D" then
+      op = op + shl(parse_gpr(q), 12); n = n + 1
+    elseif p == "N" then
+      op = op + shl(parse_gpr(q), 16); n = n + 1
+    elseif p == "S" then
+      op = op + shl(parse_gpr(q), 8); n = n + 1
+    elseif p == "M" then
+      op = op + parse_gpr(q); n = n + 1
+    elseif p == "d" then
+      local r,h = parse_vr(q, vr); op = op+shl(r,12)+shl(h,22); n = n + 1
+    elseif p == "n" then
+      local r,h = parse_vr(q, vr); op = op+shl(r,16)+shl(h,7); n = n + 1
+    elseif p == "m" then
+      local r,h = parse_vr(q, vr); op = op+r+shl(h,5); n = n + 1
+    elseif p == "P" then
+      local imm = match(q, "^#(.*)$")
+      if imm then
+	op = op + parse_imm12(imm) + 0x02000000
+      else
+	op = op + parse_gpr(q)
+      end
+      n = n + 1
+    elseif p == "p" then
+      op = op + parse_shift(q, true); n = n + 1
+    elseif p == "L" then
+      op = parse_load(params, nparams, n, op)
+    elseif p == "l" then
+      op = op + parse_vload(q)
+    elseif p == "B" then
+      local mode, n, s = parse_label(q, false)
+      waction("REL_"..mode, n, s, 1)
+    elseif p == "C" then -- blx gpr vs. blx label.
+      if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then
+	op = op + parse_gpr(q)
+      else
+	if op < 0xe0000000 then werror("unconditional instruction") end
+	local mode, n, s = parse_label(q, false)
+	waction("REL_"..mode, n, s, 1)
+	op = 0xfa000000
+      end
+    elseif p == "F" then
+      vr = "s"
+    elseif p == "G" then
+      vr = "d"
+    elseif p == "o" then
+      local r, wb = match(q, "^([^!]*)(!?)$")
+      op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0)
+      n = n + 1
+    elseif p == "R" then
+      op = op + parse_reglist(q); n = n + 1
+    elseif p == "r" then
+      op = op + parse_vrlist(q); n = n + 1
+    elseif p == "W" then
+      op = op + parse_imm16(q); n = n + 1
+    elseif p == "v" then
+      op = op + parse_imm(q, 5, 7, 0, false); n = n + 1
+    elseif p == "w" then
+      local imm = match(q, "^#(.*)$")
+      if imm then
+	op = op + parse_imm(q, 5, 7, 0, false); n = n + 1
+      else
+	op = op + shl(parse_gpr(q), 8) + 16
+      end
+    elseif p == "X" then
+      op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
+    elseif p == "Y" then
+      local imm = tonumber(match(q, "^#(.*)$")); n = n + 1
+      if not imm or shr(imm, 8) ~= 0 then
+	werror("bad immediate operand")
+      end
+      op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f)
+    elseif p == "K" then
+      local imm = tonumber(match(q, "^#(.*)$")); n = n + 1
+      if not imm or shr(imm, 16) ~= 0 then
+	werror("bad immediate operand")
+      end
+      op = op + shl(band(imm, 0xfff0), 4) + band(imm, 0x000f)
+    elseif p == "T" then
+      op = op + parse_imm(q, 24, 0, 0, false); n = n + 1
+    elseif p == "s" then
+      -- Ignored.
+    else
+      assert(false)
+    end
+  end
+  wputpos(pos, op)
+end
+
+map_op[".template__"] = function(params, template, nparams)
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 3 positions.
+  if secpos+3 > maxsecpos then wflush() end
+  local pos = wpos()
+  local lpos, apos, spos = #actlist, #actargs, secpos
+
+  local ok, err
+  for t in gmatch(template, "[^|]+") do
+    ok, err = pcall(parse_template, params, t, nparams, pos)
+    if ok then return end
+    secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
+    actargs[apos+1] = nil
+    actargs[apos+2] = nil
+    actargs[apos+3] = nil
+  end
+  error(err, 0)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _,p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = function(t, k)
+    local v = map_coreop[k]
+    if v then return v end
+    local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$")
+    local cv = map_cond[cc]
+    if cv then
+      local v = rawget(t, k1..k2)
+      if type(v) == "string" then
+	local scv = format("%x", cv)
+	return gsub(scv..sub(v, 2), "|e", "|"..scv)
+      end
+    end
+  end })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+

+ 416 - 0
luajit.mod/luajit/dynasm/dasm_mips.h

@@ -0,0 +1,416 @@
+/*
+** DynASM MIPS encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"mips"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. */
+enum {
+  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+  /* The following actions need a buffer position. */
+  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+  /* The following actions also have an argument. */
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+  DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals (bias -10). */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
+  }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    unsigned int ins = *p++;
+    unsigned int action = (ins >> 16) - 0xff00;
+    if (action >= DASM__MAX) {
+      ofs += 4;
+    } else {
+      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+      switch (action) {
+      case DASM_STOP: goto stop;
+      case DASM_SECTION:
+	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+	D->section = &D->sections[n]; goto stop;
+      case DASM_ESC: p++; ofs += 4; break;
+      case DASM_REL_EXT: break;
+      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+      case DASM_REL_LG:
+	n = (ins & 2047) - 10; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl += 10; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	break;
+      case DASM_LABEL_LG:
+	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+	}
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_IMM:
+#ifdef DASM_CHECKS
+	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+#endif
+	n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+	if (ins & 0x8000)
+	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+	else
+	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16) - 0xff00;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: p++; break;
+	case DASM_REL_EXT: break;
+	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+	case DASM_IMM: pos++; break;
+	}
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st)	((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned int *cp = (unsigned int *)buffer;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16) - 0xff00;
+	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: *cp++ = *p++; break;
+	case DASM_REL_EXT:
+	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1);
+	  goto patchrel;
+	case DASM_ALIGN:
+	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
+	  break;
+	case DASM_REL_LG:
+	  CK(n >= 0, UNDEF_LG);
+	case DASM_REL_PC:
+	  CK(n >= 0, UNDEF_PC);
+	  n = *DASM_POS2PTR(D, n);
+	  if (ins & 2048)
+	    n = n - (int)((char *)cp - base);
+	  else
+	    n = (n + (int)base) & 0x0fffffff;
+	patchrel:
+	  CK((n & 3) == 0 &&
+	     ((n + ((ins & 2048) ? 0x00020000 : 0)) >>
+	       ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL);
+	  cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff));
+	  break;
+	case DASM_LABEL_LG:
+	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  break;
+	case DASM_LABEL_PC: break;
+	case DASM_IMM:
+	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+	  break;
+	default: *cp++ = ins; break;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+  return D->status;
+}
+#endif
+

+ 953 - 0
luajit.mod/luajit/dynasm/dasm_mips.lua

@@ -0,0 +1,953 @@
+------------------------------------------------------------------------------
+-- DynASM MIPS module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+  arch =	"mips",
+  description =	"DynASM MIPS module",
+  version =	"1.3.0",
+  vernum =	 10300,
+  release =	"2012-01-23",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable = assert, setmetatable
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch = _s.match, _s.gmatch
+local concat, sort = table.concat, table.sort
+local bit = bit or require("bit")
+local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "STOP", "SECTION", "ESC", "REL_EXT",
+  "ALIGN", "REL_LG", "LABEL_LG",
+  "REL_PC", "LABEL_PC", "IMM",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+  map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+  for i = 1,nn-1 do
+    assert(out:write("0x", tohex(actlist[i]), ",\n"))
+  end
+  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxw(0xff000000 + w * 0x10000 + (val or 0))
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+  if n >= 0xff000000 then waction("ESC") end
+  wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20,next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0,next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0,next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+local map_archdef = { sp="r29", ra="r31" } -- Ext. register name -> int. name.
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  if s == "r29" then return "sp"
+  elseif s == "r31" then return "ra" end
+  return s
+end
+
+------------------------------------------------------------------------------
+
+-- Template strings for MIPS instructions.
+local map_op = {
+  -- First-level opcodes.
+  j_1 =		"08000000J",
+  jal_1 =	"0c000000J",
+  b_1 =		"10000000B",
+  beqz_2 =	"10000000SB",
+  beq_3 =	"10000000STB",
+  bnez_2 =	"14000000SB",
+  bne_3 =	"14000000STB",
+  blez_2 =	"18000000SB",
+  bgtz_2 =	"1c000000SB",
+  addi_3 =	"20000000TSI",
+  li_2 =	"24000000TI",
+  addiu_3 =	"24000000TSI",
+  slti_3 =	"28000000TSI",
+  sltiu_3 =	"2c000000TSI",
+  andi_3 =	"30000000TSU",
+  lu_2 =	"34000000TU",
+  ori_3 =	"34000000TSU",
+  xori_3 =	"38000000TSU",
+  lui_2 =	"3c000000TU",
+  beqzl_2 =	"50000000SB",
+  beql_3 =	"50000000STB",
+  bnezl_2 =	"54000000SB",
+  bnel_3 =	"54000000STB",
+  blezl_2 =	"58000000SB",
+  bgtzl_2 =	"5c000000SB",
+  lb_2 =	"80000000TO",
+  lh_2 =	"84000000TO",
+  lwl_2 =	"88000000TO",
+  lw_2 =	"8c000000TO",
+  lbu_2 =	"90000000TO",
+  lhu_2 =	"94000000TO",
+  lwr_2 =	"98000000TO",
+  sb_2 =	"a0000000TO",
+  sh_2 =	"a4000000TO",
+  swl_2 =	"a8000000TO",
+  sw_2 =	"ac000000TO",
+  swr_2 =	"b8000000TO",
+  cache_2 =	"bc000000NO",
+  ll_2 =	"c0000000TO",
+  lwc1_2 =	"c4000000HO",
+  pref_2 =	"cc000000NO",
+  ldc1_2 =	"d4000000HO",
+  sc_2 =	"e0000000TO",
+  swc1_2 =	"e4000000HO",
+  sdc1_2 =	"f4000000HO",
+
+  -- Opcode SPECIAL.
+  nop_0 =	"00000000",
+  sll_3 =	"00000000DTA",
+  movf_2 =	"00000001DS",
+  movf_3 =	"00000001DSC",
+  movt_2 =	"00010001DS",
+  movt_3 =	"00010001DSC",
+  srl_3 =	"00000002DTA",
+  rotr_3 =	"00200002DTA",
+  sra_3 =	"00000003DTA",
+  sllv_3 =	"00000004DTS",
+  srlv_3 =	"00000006DTS",
+  rotrv_3 =	"00000046DTS",
+  srav_3 =	"00000007DTS",
+  jr_1 =	"00000008S",
+  jalr_1 =	"0000f809S",
+  jalr_2 =	"00000009DS",
+  movz_3 =	"0000000aDST",
+  movn_3 =	"0000000bDST",
+  syscall_0 =	"0000000c",
+  syscall_1 =	"0000000cY",
+  break_0 =	"0000000d",
+  break_1 =	"0000000dY",
+  sync_0 =	"0000000f",
+  mfhi_1 =	"00000010D",
+  mthi_1 =	"00000011S",
+  mflo_1 =	"00000012D",
+  mtlo_1 =	"00000013S",
+  mult_2 =	"00000018ST",
+  multu_2 =	"00000019ST",
+  div_2 =	"0000001aST",
+  divu_2 =	"0000001bST",
+  add_3 =	"00000020DST",
+  move_2 =	"00000021DS",
+  addu_3 =	"00000021DST",
+  sub_3 =	"00000022DST",
+  negu_2 =	"00000023DT",
+  subu_3 =	"00000023DST",
+  and_3 =	"00000024DST",
+  or_3 =	"00000025DST",
+  xor_3 =	"00000026DST",
+  not_2 =	"00000027DS",
+  nor_3 =	"00000027DST",
+  slt_3 =	"0000002aDST",
+  sltu_3 =	"0000002bDST",
+  tge_2 =	"00000030ST",
+  tge_3 =	"00000030STZ",
+  tgeu_2 =	"00000031ST",
+  tgeu_3 =	"00000031STZ",
+  tlt_2 =	"00000032ST",
+  tlt_3 =	"00000032STZ",
+  tltu_2 =	"00000033ST",
+  tltu_3 =	"00000033STZ",
+  teq_2 =	"00000034ST",
+  teq_3 =	"00000034STZ",
+  tne_2 =	"00000036ST",
+  tne_3 =	"00000036STZ",
+
+  -- Opcode REGIMM.
+  bltz_2 =	"04000000SB",
+  bgez_2 =	"04010000SB",
+  bltzl_2 =	"04020000SB",
+  bgezl_2 =	"04030000SB",
+  tgei_2 =	"04080000SI",
+  tgeiu_2 =	"04090000SI",
+  tlti_2 =	"040a0000SI",
+  tltiu_2 =	"040b0000SI",
+  teqi_2 =	"040c0000SI",
+  tnei_2 =	"040e0000SI",
+  bltzal_2 =	"04100000SB",
+  bal_1 =	"04110000B",
+  bgezal_2 =	"04110000SB",
+  bltzall_2 =	"04120000SB",
+  bgezall_2 =	"04130000SB",
+  synci_1 =	"041f0000O",
+
+  -- Opcode SPECIAL2.
+  madd_2 =	"70000000ST",
+  maddu_2 =	"70000001ST",
+  mul_3 =	"70000002DST",
+  msub_2 =	"70000004ST",
+  msubu_2 =	"70000005ST",
+  clz_2 =	"70000020DS=",
+  clo_2 =	"70000021DS=",
+  sdbbp_0 =	"7000003f",
+  sdbbp_1 =	"7000003fY",
+
+  -- Opcode SPECIAL3.
+  ext_4 =	"7c000000TSAM", -- Note: last arg is msbd = size-1
+  ins_4 =	"7c000004TSAM", -- Note: last arg is msb = pos+size-1
+  wsbh_2 =	"7c0000a0DT",
+  seb_2 =	"7c000420DT",
+  seh_2 =	"7c000620DT",
+  rdhwr_2 =	"7c00003bTD",
+
+  -- Opcode COP0.
+  mfc0_2 =	"40000000TD",
+  mfc0_3 =	"40000000TDW",
+  mtc0_2 =	"40800000TD",
+  mtc0_3 =	"40800000TDW",
+  rdpgpr_2 =	"41400000DT",
+  di_0 =	"41606000",
+  di_1 =	"41606000T",
+  ei_0 =	"41606020",
+  ei_1 =	"41606020T",
+  wrpgpr_2 =	"41c00000DT",
+  tlbr_0 =	"42000001",
+  tlbwi_0 =	"42000002",
+  tlbwr_0 =	"42000006",
+  tlbp_0 =	"42000008",
+  eret_0 =	"42000018",
+  deret_0 =	"4200001f",
+  wait_0 =	"42000020",
+
+  -- Opcode COP1.
+  mfc1_2 =	"44000000TG",
+  cfc1_2 =	"44400000TG",
+  mfhc1_2 =	"44600000TG",
+  mtc1_2 =	"44800000TG",
+  ctc1_2 =	"44c00000TG",
+  mthc1_2 =	"44e00000TG",
+
+  bc1f_1 =	"45000000B",
+  bc1f_2 =	"45000000CB",
+  bc1t_1 =	"45010000B",
+  bc1t_2 =	"45010000CB",
+  bc1fl_1 =	"45020000B",
+  bc1fl_2 =	"45020000CB",
+  bc1tl_1 =	"45030000B",
+  bc1tl_2 =	"45030000CB",
+
+  ["add.s_3"] =		"46000000FGH",
+  ["sub.s_3"] =		"46000001FGH",
+  ["mul.s_3"] =		"46000002FGH",
+  ["div.s_3"] =		"46000003FGH",
+  ["sqrt.s_2"] =	"46000004FG",
+  ["abs.s_2"] =		"46000005FG",
+  ["mov.s_2"] =		"46000006FG",
+  ["neg.s_2"] =		"46000007FG",
+  ["round.l.s_2"] =	"46000008FG",
+  ["trunc.l.s_2"] =	"46000009FG",
+  ["ceil.l.s_2"] =	"4600000aFG",
+  ["floor.l.s_2"] =	"4600000bFG",
+  ["round.w.s_2"] =	"4600000cFG",
+  ["trunc.w.s_2"] =	"4600000dFG",
+  ["ceil.w.s_2"] =	"4600000eFG",
+  ["floor.w.s_2"] =	"4600000fFG",
+  ["movf.s_2"] =	"46000011FG",
+  ["movf.s_3"] =	"46000011FGC",
+  ["movt.s_2"] =	"46010011FG",
+  ["movt.s_3"] =	"46010011FGC",
+  ["movz.s_3"] =	"46000012FGT",
+  ["movn.s_3"] =	"46000013FGT",
+  ["recip.s_2"] =	"46000015FG",
+  ["rsqrt.s_2"] =	"46000016FG",
+  ["cvt.d.s_2"] =	"46000021FG",
+  ["cvt.w.s_2"] =	"46000024FG",
+  ["cvt.l.s_2"] =	"46000025FG",
+  ["cvt.ps.s_3"] =	"46000026FGH",
+  ["c.f.s_2"] =		"46000030GH",
+  ["c.f.s_3"] =		"46000030VGH",
+  ["c.un.s_2"] =	"46000031GH",
+  ["c.un.s_3"] =	"46000031VGH",
+  ["c.eq.s_2"] =	"46000032GH",
+  ["c.eq.s_3"] =	"46000032VGH",
+  ["c.ueq.s_2"] =	"46000033GH",
+  ["c.ueq.s_3"] =	"46000033VGH",
+  ["c.olt.s_2"] =	"46000034GH",
+  ["c.olt.s_3"] =	"46000034VGH",
+  ["c.ult.s_2"] =	"46000035GH",
+  ["c.ult.s_3"] =	"46000035VGH",
+  ["c.ole.s_2"] =	"46000036GH",
+  ["c.ole.s_3"] =	"46000036VGH",
+  ["c.ule.s_2"] =	"46000037GH",
+  ["c.ule.s_3"] =	"46000037VGH",
+  ["c.sf.s_2"] =	"46000038GH",
+  ["c.sf.s_3"] =	"46000038VGH",
+  ["c.ngle.s_2"] =	"46000039GH",
+  ["c.ngle.s_3"] =	"46000039VGH",
+  ["c.seq.s_2"] =	"4600003aGH",
+  ["c.seq.s_3"] =	"4600003aVGH",
+  ["c.ngl.s_2"] =	"4600003bGH",
+  ["c.ngl.s_3"] =	"4600003bVGH",
+  ["c.lt.s_2"] =	"4600003cGH",
+  ["c.lt.s_3"] =	"4600003cVGH",
+  ["c.nge.s_2"] =	"4600003dGH",
+  ["c.nge.s_3"] =	"4600003dVGH",
+  ["c.le.s_2"] =	"4600003eGH",
+  ["c.le.s_3"] =	"4600003eVGH",
+  ["c.ngt.s_2"] =	"4600003fGH",
+  ["c.ngt.s_3"] =	"4600003fVGH",
+
+  ["add.d_3"] =		"46200000FGH",
+  ["sub.d_3"] =		"46200001FGH",
+  ["mul.d_3"] =		"46200002FGH",
+  ["div.d_3"] =		"46200003FGH",
+  ["sqrt.d_2"] =	"46200004FG",
+  ["abs.d_2"] =		"46200005FG",
+  ["mov.d_2"] =		"46200006FG",
+  ["neg.d_2"] =		"46200007FG",
+  ["round.l.d_2"] =	"46200008FG",
+  ["trunc.l.d_2"] =	"46200009FG",
+  ["ceil.l.d_2"] =	"4620000aFG",
+  ["floor.l.d_2"] =	"4620000bFG",
+  ["round.w.d_2"] =	"4620000cFG",
+  ["trunc.w.d_2"] =	"4620000dFG",
+  ["ceil.w.d_2"] =	"4620000eFG",
+  ["floor.w.d_2"] =	"4620000fFG",
+  ["movf.d_2"] =	"46200011FG",
+  ["movf.d_3"] =	"46200011FGC",
+  ["movt.d_2"] =	"46210011FG",
+  ["movt.d_3"] =	"46210011FGC",
+  ["movz.d_3"] =	"46200012FGT",
+  ["movn.d_3"] =	"46200013FGT",
+  ["recip.d_2"] =	"46200015FG",
+  ["rsqrt.d_2"] =	"46200016FG",
+  ["cvt.s.d_2"] =	"46200020FG",
+  ["cvt.w.d_2"] =	"46200024FG",
+  ["cvt.l.d_2"] =	"46200025FG",
+  ["c.f.d_2"] =		"46200030GH",
+  ["c.f.d_3"] =		"46200030VGH",
+  ["c.un.d_2"] =	"46200031GH",
+  ["c.un.d_3"] =	"46200031VGH",
+  ["c.eq.d_2"] =	"46200032GH",
+  ["c.eq.d_3"] =	"46200032VGH",
+  ["c.ueq.d_2"] =	"46200033GH",
+  ["c.ueq.d_3"] =	"46200033VGH",
+  ["c.olt.d_2"] =	"46200034GH",
+  ["c.olt.d_3"] =	"46200034VGH",
+  ["c.ult.d_2"] =	"46200035GH",
+  ["c.ult.d_3"] =	"46200035VGH",
+  ["c.ole.d_2"] =	"46200036GH",
+  ["c.ole.d_3"] =	"46200036VGH",
+  ["c.ule.d_2"] =	"46200037GH",
+  ["c.ule.d_3"] =	"46200037VGH",
+  ["c.sf.d_2"] =	"46200038GH",
+  ["c.sf.d_3"] =	"46200038VGH",
+  ["c.ngle.d_2"] =	"46200039GH",
+  ["c.ngle.d_3"] =	"46200039VGH",
+  ["c.seq.d_2"] =	"4620003aGH",
+  ["c.seq.d_3"] =	"4620003aVGH",
+  ["c.ngl.d_2"] =	"4620003bGH",
+  ["c.ngl.d_3"] =	"4620003bVGH",
+  ["c.lt.d_2"] =	"4620003cGH",
+  ["c.lt.d_3"] =	"4620003cVGH",
+  ["c.nge.d_2"] =	"4620003dGH",
+  ["c.nge.d_3"] =	"4620003dVGH",
+  ["c.le.d_2"] =	"4620003eGH",
+  ["c.le.d_3"] =	"4620003eVGH",
+  ["c.ngt.d_2"] =	"4620003fGH",
+  ["c.ngt.d_3"] =	"4620003fVGH",
+
+  ["add.ps_3"] =	"46c00000FGH",
+  ["sub.ps_3"] =	"46c00001FGH",
+  ["mul.ps_3"] =	"46c00002FGH",
+  ["abs.ps_2"] =	"46c00005FG",
+  ["mov.ps_2"] =	"46c00006FG",
+  ["neg.ps_2"] =	"46c00007FG",
+  ["movf.ps_2"] =	"46c00011FG",
+  ["movf.ps_3"] =	"46c00011FGC",
+  ["movt.ps_2"] =	"46c10011FG",
+  ["movt.ps_3"] =	"46c10011FGC",
+  ["movz.ps_3"] =	"46c00012FGT",
+  ["movn.ps_3"] =	"46c00013FGT",
+  ["cvt.s.pu_2"] =	"46c00020FG",
+  ["cvt.s.pl_2"] =	"46c00028FG",
+  ["pll.ps_3"] =	"46c0002cFGH",
+  ["plu.ps_3"] =	"46c0002dFGH",
+  ["pul.ps_3"] =	"46c0002eFGH",
+  ["puu.ps_3"] =	"46c0002fFGH",
+  ["c.f.ps_2"] =	"46c00030GH",
+  ["c.f.ps_3"] =	"46c00030VGH",
+  ["c.un.ps_2"] =	"46c00031GH",
+  ["c.un.ps_3"] =	"46c00031VGH",
+  ["c.eq.ps_2"] =	"46c00032GH",
+  ["c.eq.ps_3"] =	"46c00032VGH",
+  ["c.ueq.ps_2"] =	"46c00033GH",
+  ["c.ueq.ps_3"] =	"46c00033VGH",
+  ["c.olt.ps_2"] =	"46c00034GH",
+  ["c.olt.ps_3"] =	"46c00034VGH",
+  ["c.ult.ps_2"] =	"46c00035GH",
+  ["c.ult.ps_3"] =	"46c00035VGH",
+  ["c.ole.ps_2"] =	"46c00036GH",
+  ["c.ole.ps_3"] =	"46c00036VGH",
+  ["c.ule.ps_2"] =	"46c00037GH",
+  ["c.ule.ps_3"] =	"46c00037VGH",
+  ["c.sf.ps_2"] =	"46c00038GH",
+  ["c.sf.ps_3"] =	"46c00038VGH",
+  ["c.ngle.ps_2"] =	"46c00039GH",
+  ["c.ngle.ps_3"] =	"46c00039VGH",
+  ["c.seq.ps_2"] =	"46c0003aGH",
+  ["c.seq.ps_3"] =	"46c0003aVGH",
+  ["c.ngl.ps_2"] =	"46c0003bGH",
+  ["c.ngl.ps_3"] =	"46c0003bVGH",
+  ["c.lt.ps_2"] =	"46c0003cGH",
+  ["c.lt.ps_3"] =	"46c0003cVGH",
+  ["c.nge.ps_2"] =	"46c0003dGH",
+  ["c.nge.ps_3"] =	"46c0003dVGH",
+  ["c.le.ps_2"] =	"46c0003eGH",
+  ["c.le.ps_3"] =	"46c0003eVGH",
+  ["c.ngt.ps_2"] =	"46c0003fGH",
+  ["c.ngt.ps_3"] =	"46c0003fVGH",
+
+  ["cvt.s.w_2"] =	"46800020FG",
+  ["cvt.d.w_2"] =	"46800021FG",
+
+  ["cvt.s.l_2"] =	"46a00020FG",
+  ["cvt.d.l_2"] =	"46a00021FG",
+
+  -- Opcode COP1X.
+  lwxc1_2 =		"4c000000FX",
+  ldxc1_2 =		"4c000001FX",
+  luxc1_2 =		"4c000005FX",
+  swxc1_2 =		"4c000008FX",
+  sdxc1_2 =		"4c000009FX",
+  suxc1_2 =		"4c00000dFX",
+  prefx_2 =		"4c00000fMX",
+  ["alnv.ps_4"] =	"4c00001eFGHS",
+  ["madd.s_4"] =	"4c000020FRGH",
+  ["madd.d_4"] =	"4c000021FRGH",
+  ["madd.ps_4"] =	"4c000026FRGH",
+  ["msub.s_4"] =	"4c000028FRGH",
+  ["msub.d_4"] =	"4c000029FRGH",
+  ["msub.ps_4"] =	"4c00002eFRGH",
+  ["nmadd.s_4"] =	"4c000030FRGH",
+  ["nmadd.d_4"] =	"4c000031FRGH",
+  ["nmadd.ps_4"] =	"4c000036FRGH",
+  ["nmsub.s_4"] =	"4c000038FRGH",
+  ["nmsub.d_4"] =	"4c000039FRGH",
+  ["nmsub.ps_4"] =	"4c00003eFRGH",
+}
+
+------------------------------------------------------------------------------
+
+local function parse_gpr(expr)
+  local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local r = match(expr, "^r([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r, tp end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_fpr(expr)
+  local r = match(expr, "^f([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+  local n = tonumber(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n then
+      if signed then
+	local s = sar(m, bits-1)
+	if s == 0 then return shl(m, shift)
+	elseif s == -1 then return shl(m + shl(1, bits), shift) end
+      else
+	if sar(m, bits) == 0 then return shl(m, shift) end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif match(imm, "^[rf]([1-3]?[0-9])$") or
+	 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
+    werror("expected immediate operand, got register")
+  else
+    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    return 0
+  end
+end
+
+local function parse_disp(disp)
+  local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
+  if imm then
+    local r = shl(parse_gpr(reg), 21)
+    local extname = match(imm, "^extern%s+(%S+)$")
+    if extname then
+      waction("REL_EXT", map_extern[extname], nil, 1)
+      return r
+    else
+      return r + parse_imm(imm, 16, 0, 0, true)
+    end
+  end
+  local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
+  if reg and tailr ~= "" then
+    local r, tp = parse_gpr(reg)
+    if tp then
+      waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr))
+      return shl(r, 21)
+    end
+  end
+  werror("bad displacement `"..disp.."'")
+end
+
+local function parse_index(idx)
+  local rt, rs = match(idx, "^(.*)%(([%w_:]+)%)$")
+  if rt then
+    rt = parse_gpr(rt)
+    rs = parse_gpr(rs)
+    return shl(rt, 16) + shl(rs, 21)
+  end
+  werror("bad index `"..idx.."'")
+end
+
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+map_op[".template__"] = function(params, template, nparams)
+  if not params then return sub(template, 9) end
+  local op = tonumber(sub(template, 1, 8), 16)
+  local n = 1
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 2 positions (ins/ext).
+  if secpos+2 > maxsecpos then wflush() end
+  local pos = wpos()
+
+  -- Process each character.
+  for p in gmatch(sub(template, 9), ".") do
+    if p == "D" then
+      op = op + shl(parse_gpr(params[n]), 11); n = n + 1
+    elseif p == "T" then
+      op = op + shl(parse_gpr(params[n]), 16); n = n + 1
+    elseif p == "S" then
+      op = op + shl(parse_gpr(params[n]), 21); n = n + 1
+    elseif p == "F" then
+      op = op + shl(parse_fpr(params[n]), 6); n = n + 1
+    elseif p == "G" then
+      op = op + shl(parse_fpr(params[n]), 11); n = n + 1
+    elseif p == "H" then
+      op = op + shl(parse_fpr(params[n]), 16); n = n + 1
+    elseif p == "R" then
+      op = op + shl(parse_fpr(params[n]), 21); n = n + 1
+    elseif p == "I" then
+      op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1
+    elseif p == "U" then
+      op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1
+    elseif p == "O" then
+      op = op + parse_disp(params[n]); n = n + 1
+    elseif p == "X" then
+      op = op + parse_index(params[n]); n = n + 1
+    elseif p == "B" or p == "J" then
+      local mode, n, s = parse_label(params[n], false)
+      if p == "B" then n = n + 2048 end
+      waction("REL_"..mode, n, s, 1)
+      n = n + 1
+    elseif p == "A" then
+      op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
+    elseif p == "M" then
+      op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
+    elseif p == "N" then
+      op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1
+    elseif p == "C" then
+      op = op + parse_imm(params[n], 3, 18, 0, false); n = n + 1
+    elseif p == "V" then
+      op = op + parse_imm(params[n], 3, 8, 0, false); n = n + 1
+    elseif p == "W" then
+      op = op + parse_imm(params[n], 3, 0, 0, false); n = n + 1
+    elseif p == "Y" then
+      op = op + parse_imm(params[n], 20, 6, 0, false); n = n + 1
+    elseif p == "Z" then
+      op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1
+    elseif p == "=" then
+      op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo.
+    else
+      assert(false)
+    end
+  end
+  wputpos(pos, op)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _,p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+

+ 412 - 0
luajit.mod/luajit/dynasm/dasm_ppc.h

@@ -0,0 +1,412 @@
+/*
+** DynASM PPC encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"ppc"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. */
+enum {
+  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+  /* The following actions need a buffer position. */
+  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+  /* The following actions also have an argument. */
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+  DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals (bias -10). */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
+  }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    unsigned int ins = *p++;
+    unsigned int action = (ins >> 16);
+    if (action >= DASM__MAX) {
+      ofs += 4;
+    } else {
+      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+      switch (action) {
+      case DASM_STOP: goto stop;
+      case DASM_SECTION:
+	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+	D->section = &D->sections[n]; goto stop;
+      case DASM_ESC: p++; ofs += 4; break;
+      case DASM_REL_EXT: break;
+      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+      case DASM_REL_LG:
+	n = (ins & 2047) - 10; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl += 10; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	break;
+      case DASM_LABEL_LG:
+	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+	}
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_IMM:
+#ifdef DASM_CHECKS
+	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+#endif
+	n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+	if (ins & 0x8000)
+	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+	else
+	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: p++; break;
+	case DASM_REL_EXT: break;
+	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+	case DASM_IMM: pos++; break;
+	}
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st)	((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned int *cp = (unsigned int *)buffer;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: *cp++ = *p++; break;
+	case DASM_REL_EXT:
+	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
+	  goto patchrel;
+	case DASM_ALIGN:
+	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
+	  break;
+	case DASM_REL_LG:
+	  CK(n >= 0, UNDEF_LG);
+	case DASM_REL_PC:
+	  CK(n >= 0, UNDEF_PC);
+	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+	patchrel:
+	  CK((n & 3) == 0 &&
+	      (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
+	       ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
+	  cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
+	  break;
+	case DASM_LABEL_LG:
+	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  break;
+	case DASM_LABEL_PC: break;
+	case DASM_IMM:
+	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+	  break;
+	default: *cp++ = ins; break;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+  return D->status;
+}
+#endif
+

+ 1249 - 0
luajit.mod/luajit/dynasm/dasm_ppc.lua

@@ -0,0 +1,1249 @@
+------------------------------------------------------------------------------
+-- DynASM PPC module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+  arch =	"ppc",
+  description =	"DynASM PPC module",
+  version =	"1.3.0",
+  vernum =	 10300,
+  release =	"2011-05-05",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable = assert, setmetatable
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch = _s.match, _s.gmatch
+local concat, sort = table.concat, table.sort
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local tohex = bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "STOP", "SECTION", "ESC", "REL_EXT",
+  "ALIGN", "REL_LG", "LABEL_LG",
+  "REL_PC", "LABEL_PC", "IMM",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+  map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+  for i = 1,nn-1 do
+    assert(out:write("0x", tohex(actlist[i]), ",\n"))
+  end
+  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxw(w * 0x10000 + (val or 0))
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+  if n <= 0xffffff then waction("ESC") end
+  wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20,next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0,next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0,next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+local map_archdef = { sp = "r1" } -- Ext. register name -> int. name.
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  if s == "r1" then return "sp" end
+  return s
+end
+
+local map_cond = {
+  lt = 0, gt = 1, eq = 2, so = 3,
+  ge = 4, le = 5, ne = 6, ns = 7,
+}
+
+------------------------------------------------------------------------------
+
+-- Template strings for PPC instructions.
+local map_op = {
+  tdi_3 =	"08000000ARI",
+  twi_3 =	"0c000000ARI",
+  mulli_3 =	"1c000000RRI",
+  subfic_3 =	"20000000RRI",
+  cmplwi_3 =	"28000000XRU",
+  cmplwi_2 =	"28000000-RU",
+  cmpldi_3 =	"28200000XRU",
+  cmpldi_2 =	"28200000-RU",
+  cmpwi_3 =	"2c000000XRI",
+  cmpwi_2 =	"2c000000-RI",
+  cmpdi_3 =	"2c200000XRI",
+  cmpdi_2 =	"2c200000-RI",
+  addic_3 =	"30000000RRI",
+  ["addic._3"] = "34000000RRI",
+  addi_3 =	"38000000RR0I",
+  li_2 =	"38000000RI",
+  la_2 =	"38000000RD",
+  addis_3 =	"3c000000RR0I",
+  lis_2 =	"3c000000RI",
+  lus_2 =	"3c000000RU",
+  bc_3 =	"40000000AAK",
+  bcl_3 =	"40000001AAK",
+  bdnz_1 =	"42000000K",
+  bdz_1 =	"42400000K",
+  sc_0 =	"44000000",
+  b_1 =		"48000000J",
+  bl_1 =	"48000001J",
+  rlwimi_5 =	"50000000RR~AAA.",
+  rlwinm_5 =	"54000000RR~AAA.",
+  rlwnm_5 =	"5c000000RR~RAA.",
+  ori_3 =	"60000000RR~U",
+  nop_0 =	"60000000",
+  oris_3 =	"64000000RR~U",
+  xori_3 =	"68000000RR~U",
+  xoris_3 =	"6c000000RR~U",
+  ["andi._3"] =	"70000000RR~U",
+  ["andis._3"] = "74000000RR~U",
+  lwz_2 =	"80000000RD",
+  lwzu_2 =	"84000000RD",
+  lbz_2 =	"88000000RD",
+  lbzu_2 =	"8c000000RD",
+  stw_2 =	"90000000RD",
+  stwu_2 =	"94000000RD",
+  stb_2 =	"98000000RD",
+  stbu_2 =	"9c000000RD",
+  lhz_2 =	"a0000000RD",
+  lhzu_2 =	"a4000000RD",
+  lha_2 =	"a8000000RD",
+  lhau_2 =	"ac000000RD",
+  sth_2 =	"b0000000RD",
+  sthu_2 =	"b4000000RD",
+  lmw_2 =	"b8000000RD",
+  stmw_2 =	"bc000000RD",
+  lfs_2 =	"c0000000FD",
+  lfsu_2 =	"c4000000FD",
+  lfd_2 =	"c8000000FD",
+  lfdu_2 =	"cc000000FD",
+  stfs_2 =	"d0000000FD",
+  stfsu_2 =	"d4000000FD",
+  stfd_2 =	"d8000000FD",
+  stfdu_2 =	"dc000000FD",
+  ld_2 =	"e8000000RD", -- NYI: displacement must be divisible by 4.
+  ldu_2 =	"e8000001RD",
+  lwa_2 =	"e8000002RD",
+  std_2 =	"f8000000RD",
+  stdu_2 =	"f8000001RD",
+
+  -- Primary opcode 19:
+  mcrf_2 =	"4c000000XX",
+  isync_0 =	"4c00012c",
+  crnor_3 =	"4c000042CCC",
+  crnot_2 =	"4c000042CC=",
+  crandc_3 =	"4c000102CCC",
+  crxor_3 =	"4c000182CCC",
+  crclr_1 =	"4c000182C==",
+  crnand_3 =	"4c0001c2CCC",
+  crand_3 =	"4c000202CCC",
+  creqv_3 =	"4c000242CCC",
+  crset_1 =	"4c000242C==",
+  crorc_3 =	"4c000342CCC",
+  cror_3 =	"4c000382CCC",
+  crmove_2 =	"4c000382CC=",
+  bclr_2 =	"4c000020AA",
+  bclrl_2 =	"4c000021AA",
+  bcctr_2 =	"4c000420AA",
+  bcctrl_2 =	"4c000421AA",
+  blr_0 =	"4e800020",
+  blrl_0 =	"4e800021",
+  bctr_0 =	"4e800420",
+  bctrl_0 =	"4e800421",
+
+  -- Primary opcode 31:
+  cmpw_3 =	"7c000000XRR",
+  cmpw_2 =	"7c000000-RR",
+  cmpd_3 =	"7c200000XRR",
+  cmpd_2 =	"7c200000-RR",
+  tw_3 =	"7c000008ARR",
+  subfc_3 =	"7c000010RRR.",
+  subc_3 =	"7c000010RRR~.",
+  mulhdu_3 =	"7c000012RRR.",
+  addc_3 =	"7c000014RRR.",
+  mulhwu_3 =	"7c000016RRR.",
+  isel_4 =	"7c00001eRRRC",
+  isellt_3 =	"7c00001eRRR",
+  iselgt_3 =	"7c00005eRRR",
+  iseleq_3 =	"7c00009eRRR",
+  mfcr_1 =	"7c000026R",
+  mfocrf_2 =	"7c100026RG",
+  mtcrf_2 =	"7c000120GR",
+  mtocrf_2 =	"7c100120GR",
+  lwarx_3 =	"7c000028RR0R",
+  ldx_3 =	"7c00002aRR0R",
+  lwzx_3 =	"7c00002eRR0R",
+  slw_3 =	"7c000030RR~R.",
+  cntlzw_2 =	"7c000034RR~",
+  sld_3 =	"7c000036RR~R.",
+  and_3 =	"7c000038RR~R.",
+  cmplw_3 =	"7c000040XRR",
+  cmplw_2 =	"7c000040-RR",
+  cmpld_3 =	"7c200040XRR",
+  cmpld_2 =	"7c200040-RR",
+  subf_3 =	"7c000050RRR.",
+  sub_3 =	"7c000050RRR~.",
+  ldux_3 =	"7c00006aRR0R",
+  dcbst_2 =	"7c00006c-RR",
+  lwzux_3 =	"7c00006eRR0R",
+  cntlzd_2 =	"7c000074RR~",
+  andc_3 =	"7c000078RR~R.",
+  td_3 =	"7c000088ARR",
+  mulhd_3 =	"7c000092RRR.",
+  mulhw_3 =	"7c000096RRR.",
+  ldarx_3 =	"7c0000a8RR0R",
+  dcbf_2 =	"7c0000ac-RR",
+  lbzx_3 =	"7c0000aeRR0R",
+  neg_2 =	"7c0000d0RR.",
+  lbzux_3 =	"7c0000eeRR0R",
+  popcntb_2 =	"7c0000f4RR~",
+  not_2 =	"7c0000f8RR~%.",
+  nor_3 =	"7c0000f8RR~R.",
+  subfe_3 =	"7c000110RRR.",
+  sube_3 =	"7c000110RRR~.",
+  adde_3 =	"7c000114RRR.",
+  stdx_3 =	"7c00012aRR0R",
+  stwcx_3 =	"7c00012cRR0R.",
+  stwx_3 =	"7c00012eRR0R",
+  prtyw_2 =	"7c000134RR~",
+  stdux_3 =	"7c00016aRR0R",
+  stwux_3 =	"7c00016eRR0R",
+  prtyd_2 =	"7c000174RR~",
+  subfze_2 =	"7c000190RR.",
+  addze_2 =	"7c000194RR.",
+  stdcx_3 =	"7c0001acRR0R.",
+  stbx_3 =	"7c0001aeRR0R",
+  subfme_2 =	"7c0001d0RR.",
+  mulld_3 =	"7c0001d2RRR.",
+  addme_2 =	"7c0001d4RR.",
+  mullw_3 =	"7c0001d6RRR.",
+  dcbtst_2 =	"7c0001ec-RR",
+  stbux_3 =	"7c0001eeRR0R",
+  add_3 =	"7c000214RRR.",
+  dcbt_2 =	"7c00022c-RR",
+  lhzx_3 =	"7c00022eRR0R",
+  eqv_3 =	"7c000238RR~R.",
+  eciwx_3 =	"7c00026cRR0R",
+  lhzux_3 =	"7c00026eRR0R",
+  xor_3 =	"7c000278RR~R.",
+  mfspefscr_1 =	"7c0082a6R",
+  mfxer_1 =	"7c0102a6R",
+  mflr_1 =	"7c0802a6R",
+  mfctr_1 =	"7c0902a6R",
+  lwax_3 =	"7c0002aaRR0R",
+  lhax_3 =	"7c0002aeRR0R",
+  mftb_1 =	"7c0c42e6R",
+  mftbu_1 =	"7c0d42e6R",
+  lwaux_3 =	"7c0002eaRR0R",
+  lhaux_3 =	"7c0002eeRR0R",
+  sthx_3 =	"7c00032eRR0R",
+  orc_3 =	"7c000338RR~R.",
+  ecowx_3 =	"7c00036cRR0R",
+  sthux_3 =	"7c00036eRR0R",
+  or_3 =	"7c000378RR~R.",
+  mr_2 =	"7c000378RR~%.",
+  divdu_3 =	"7c000392RRR.",
+  divwu_3 =	"7c000396RRR.",
+  mtspefscr_1 =	"7c0083a6R",
+  mtxer_1 =	"7c0103a6R",
+  mtlr_1 =	"7c0803a6R",
+  mtctr_1 =	"7c0903a6R",
+  dcbi_2 =	"7c0003ac-RR",
+  nand_3 =	"7c0003b8RR~R.",
+  divd_3 =	"7c0003d2RRR.",
+  divw_3 =	"7c0003d6RRR.",
+  cmpb_3 =	"7c0003f8RR~R.",
+  mcrxr_1 =	"7c000400X",
+  subfco_3 =	"7c000410RRR.",
+  subco_3 =	"7c000410RRR~.",
+  addco_3 =	"7c000414RRR.",
+  ldbrx_3 =	"7c000428RR0R",
+  lswx_3 =	"7c00042aRR0R",
+  lwbrx_3 =	"7c00042cRR0R",
+  lfsx_3 =	"7c00042eFR0R",
+  srw_3 =	"7c000430RR~R.",
+  srd_3 =	"7c000436RR~R.",
+  subfo_3 =	"7c000450RRR.",
+  subo_3 =	"7c000450RRR~.",
+  lfsux_3 =	"7c00046eFR0R",
+  lswi_3 =	"7c0004aaRR0A",
+  sync_0 =	"7c0004ac",
+  lwsync_0 =	"7c2004ac",
+  ptesync_0 =	"7c4004ac",
+  lfdx_3 =	"7c0004aeFR0R",
+  nego_2 =	"7c0004d0RR.",
+  lfdux_3 =	"7c0004eeFR0R",
+  subfeo_3 =	"7c000510RRR.",
+  subeo_3 =	"7c000510RRR~.",
+  addeo_3 =	"7c000514RRR.",
+  stdbrx_3 =	"7c000528RR0R",
+  stswx_3 =	"7c00052aRR0R",
+  stwbrx_3 =	"7c00052cRR0R",
+  stfsx_3 =	"7c00052eFR0R",
+  stfsux_3 =	"7c00056eFR0R",
+  subfzeo_2 =	"7c000590RR.",
+  addzeo_2 =	"7c000594RR.",
+  stswi_3 =	"7c0005aaRR0A",
+  stfdx_3 =	"7c0005aeFR0R",
+  subfmeo_2 =	"7c0005d0RR.",
+  mulldo_3 =	"7c0005d2RRR.",
+  addmeo_2 =	"7c0005d4RR.",
+  mullwo_3 =	"7c0005d6RRR.",
+  dcba_2 =	"7c0005ec-RR",
+  stfdux_3 =	"7c0005eeFR0R",
+  addo_3 =	"7c000614RRR.",
+  lhbrx_3 =	"7c00062cRR0R",
+  sraw_3 =	"7c000630RR~R.",
+  srad_3 =	"7c000634RR~R.",
+  srawi_3 =	"7c000670RR~A.",
+  sradi_3 =	"7c000674RR~H.",
+  eieio_0 =	"7c0006ac",
+  lfiwax_3 =	"7c0006aeFR0R",
+  sthbrx_3 =	"7c00072cRR0R",
+  extsh_2 =	"7c000734RR~.",
+  extsb_2 =	"7c000774RR~.",
+  divduo_3 =	"7c000792RRR.",
+  divwou_3 =	"7c000796RRR.",
+  icbi_2 =	"7c0007ac-RR",
+  stfiwx_3 =	"7c0007aeFR0R",
+  extsw_2 =	"7c0007b4RR~.",
+  divdo_3 =	"7c0007d2RRR.",
+  divwo_3 =	"7c0007d6RRR.",
+  dcbz_2 =	"7c0007ec-RR",
+
+  -- Primary opcode 30:
+  rldicl_4 =	"78000000RR~HM.",
+  rldicr_4 =	"78000004RR~HM.",
+  rldic_4 =	"78000008RR~HM.",
+  rldimi_4 =	"7800000cRR~HM.",
+  rldcl_4 =	"78000010RR~RM.",
+  rldcr_4 =	"78000012RR~RM.",
+
+  -- Primary opcode 59:
+  fdivs_3 =	"ec000024FFF.",
+  fsubs_3 =	"ec000028FFF.",
+  fadds_3 =	"ec00002aFFF.",
+  fsqrts_2 =	"ec00002cF-F.",
+  fres_2 =	"ec000030F-F.",
+  fmuls_3 =	"ec000032FF-F.",
+  frsqrtes_2 =	"ec000034F-F.",
+  fmsubs_4 =	"ec000038FFFF~.",
+  fmadds_4 =	"ec00003aFFFF~.",
+  fnmsubs_4 =	"ec00003cFFFF~.",
+  fnmadds_4 =	"ec00003eFFFF~.",
+
+  -- Primary opcode 63:
+  fdiv_3 =	"fc000024FFF.",
+  fsub_3 =	"fc000028FFF.",
+  fadd_3 =	"fc00002aFFF.",
+  fsqrt_2 =	"fc00002cF-F.",
+  fsel_4 =	"fc00002eFFFF~.",
+  fre_2 =	"fc000030F-F.",
+  fmul_3 =	"fc000032FF-F.",
+  frsqrte_2 =	"fc000034F-F.",
+  fmsub_4 =	"fc000038FFFF~.",
+  fmadd_4 =	"fc00003aFFFF~.",
+  fnmsub_4 =	"fc00003cFFFF~.",
+  fnmadd_4 =	"fc00003eFFFF~.",
+  fcmpu_3 =	"fc000000XFF",
+  fcpsgn_3 =	"fc000010FFF.",
+  fcmpo_3 =	"fc000040XFF",
+  mtfsb1_1 =	"fc00004cA",
+  fneg_2 =	"fc000050F-F.",
+  mcrfs_2 =	"fc000080XX",
+  mtfsb0_1 =	"fc00008cA",
+  fmr_2 =	"fc000090F-F.",
+  frsp_2 =	"fc000018F-F.",
+  fctiw_2 =	"fc00001cF-F.",
+  fctiwz_2 =	"fc00001eF-F.",
+  mtfsfi_2 =	"fc00010cAA", -- NYI: upshift.
+  fnabs_2 =	"fc000110F-F.",
+  fabs_2 =	"fc000210F-F.",
+  frin_2 =	"fc000310F-F.",
+  friz_2 =	"fc000350F-F.",
+  frip_2 =	"fc000390F-F.",
+  frim_2 =	"fc0003d0F-F.",
+  mffs_1 =	"fc00048eF.",
+  -- NYI: mtfsf, mtfsb0, mtfsb1.
+  fctid_2 =	"fc00065cF-F.",
+  fctidz_2 =	"fc00065eF-F.",
+  fcfid_2 =	"fc00069cF-F.",
+
+  -- Primary opcode 4, SPE APU extension:
+  evaddw_3 =		"10000200RRR",
+  evaddiw_3 =		"10000202RAR~",
+  evsubw_3 =		"10000204RRR~",
+  evsubiw_3 =		"10000206RAR~",
+  evabs_2 =		"10000208RR",
+  evneg_2 =		"10000209RR",
+  evextsb_2 =		"1000020aRR",
+  evextsh_2 =		"1000020bRR",
+  evrndw_2 =		"1000020cRR",
+  evcntlzw_2 =		"1000020dRR",
+  evcntlsw_2 =		"1000020eRR",
+  brinc_3 =		"1000020fRRR",
+  evand_3 =		"10000211RRR",
+  evandc_3 =		"10000212RRR",
+  evxor_3 =		"10000216RRR",
+  evor_3 =		"10000217RRR",
+  evmr_2 =		"10000217RR=",
+  evnor_3 =		"10000218RRR",
+  evnot_2 =		"10000218RR=",
+  eveqv_3 =		"10000219RRR",
+  evorc_3 =		"1000021bRRR",
+  evnand_3 =		"1000021eRRR",
+  evsrwu_3 =		"10000220RRR",
+  evsrws_3 =		"10000221RRR",
+  evsrwiu_3 =		"10000222RRA",
+  evsrwis_3 =		"10000223RRA",
+  evslw_3 =		"10000224RRR",
+  evslwi_3 =		"10000226RRA",
+  evrlw_3 =		"10000228RRR",
+  evsplati_2 =		"10000229RS",
+  evrlwi_3 =		"1000022aRRA",
+  evsplatfi_2 =		"1000022bRS",
+  evmergehi_3 =		"1000022cRRR",
+  evmergelo_3 =		"1000022dRRR",
+  evcmpgtu_3 =		"10000230XRR",
+  evcmpgtu_2 =		"10000230-RR",
+  evcmpgts_3 =		"10000231XRR",
+  evcmpgts_2 =		"10000231-RR",
+  evcmpltu_3 =		"10000232XRR",
+  evcmpltu_2 =		"10000232-RR",
+  evcmplts_3 =		"10000233XRR",
+  evcmplts_2 =		"10000233-RR",
+  evcmpeq_3 =		"10000234XRR",
+  evcmpeq_2 =		"10000234-RR",
+  evsel_4 =		"10000278RRRW",
+  evsel_3 =		"10000278RRR",
+  evfsadd_3 =		"10000280RRR",
+  evfssub_3 =		"10000281RRR",
+  evfsabs_2 =		"10000284RR",
+  evfsnabs_2 =		"10000285RR",
+  evfsneg_2 =		"10000286RR",
+  evfsmul_3 =		"10000288RRR",
+  evfsdiv_3 =		"10000289RRR",
+  evfscmpgt_3 =		"1000028cXRR",
+  evfscmpgt_2 =		"1000028c-RR",
+  evfscmplt_3 =		"1000028dXRR",
+  evfscmplt_2 =		"1000028d-RR",
+  evfscmpeq_3 =		"1000028eXRR",
+  evfscmpeq_2 =		"1000028e-RR",
+  evfscfui_2 =		"10000290R-R",
+  evfscfsi_2 =		"10000291R-R",
+  evfscfuf_2 =		"10000292R-R",
+  evfscfsf_2 =		"10000293R-R",
+  evfsctui_2 =		"10000294R-R",
+  evfsctsi_2 =		"10000295R-R",
+  evfsctuf_2 =		"10000296R-R",
+  evfsctsf_2 =		"10000297R-R",
+  evfsctuiz_2 =		"10000298R-R",
+  evfsctsiz_2 =		"1000029aR-R",
+  evfststgt_3 =		"1000029cXRR",
+  evfststgt_2 =		"1000029c-RR",
+  evfststlt_3 =		"1000029dXRR",
+  evfststlt_2 =		"1000029d-RR",
+  evfststeq_3 =		"1000029eXRR",
+  evfststeq_2 =		"1000029e-RR",
+  efsadd_3 =		"100002c0RRR",
+  efssub_3 =		"100002c1RRR",
+  efsabs_2 =		"100002c4RR",
+  efsnabs_2 =		"100002c5RR",
+  efsneg_2 =		"100002c6RR",
+  efsmul_3 =		"100002c8RRR",
+  efsdiv_3 =		"100002c9RRR",
+  efscmpgt_3 =		"100002ccXRR",
+  efscmpgt_2 =		"100002cc-RR",
+  efscmplt_3 =		"100002cdXRR",
+  efscmplt_2 =		"100002cd-RR",
+  efscmpeq_3 =		"100002ceXRR",
+  efscmpeq_2 =		"100002ce-RR",
+  efscfd_2 =		"100002cfR-R",
+  efscfui_2 =		"100002d0R-R",
+  efscfsi_2 =		"100002d1R-R",
+  efscfuf_2 =		"100002d2R-R",
+  efscfsf_2 =		"100002d3R-R",
+  efsctui_2 =		"100002d4R-R",
+  efsctsi_2 =		"100002d5R-R",
+  efsctuf_2 =		"100002d6R-R",
+  efsctsf_2 =		"100002d7R-R",
+  efsctuiz_2 =		"100002d8R-R",
+  efsctsiz_2 =		"100002daR-R",
+  efststgt_3 =		"100002dcXRR",
+  efststgt_2 =		"100002dc-RR",
+  efststlt_3 =		"100002ddXRR",
+  efststlt_2 =		"100002dd-RR",
+  efststeq_3 =		"100002deXRR",
+  efststeq_2 =		"100002de-RR",
+  efdadd_3 =		"100002e0RRR",
+  efdsub_3 =		"100002e1RRR",
+  efdcfuid_2 =		"100002e2R-R",
+  efdcfsid_2 =		"100002e3R-R",
+  efdabs_2 =		"100002e4RR",
+  efdnabs_2 =		"100002e5RR",
+  efdneg_2 =		"100002e6RR",
+  efdmul_3 =		"100002e8RRR",
+  efddiv_3 =		"100002e9RRR",
+  efdctuidz_2 =		"100002eaR-R",
+  efdctsidz_2 =		"100002ebR-R",
+  efdcmpgt_3 =		"100002ecXRR",
+  efdcmpgt_2 =		"100002ec-RR",
+  efdcmplt_3 =		"100002edXRR",
+  efdcmplt_2 =		"100002ed-RR",
+  efdcmpeq_3 =		"100002eeXRR",
+  efdcmpeq_2 =		"100002ee-RR",
+  efdcfs_2 =		"100002efR-R",
+  efdcfui_2 =		"100002f0R-R",
+  efdcfsi_2 =		"100002f1R-R",
+  efdcfuf_2 =		"100002f2R-R",
+  efdcfsf_2 =		"100002f3R-R",
+  efdctui_2 =		"100002f4R-R",
+  efdctsi_2 =		"100002f5R-R",
+  efdctuf_2 =		"100002f6R-R",
+  efdctsf_2 =		"100002f7R-R",
+  efdctuiz_2 =		"100002f8R-R",
+  efdctsiz_2 =		"100002faR-R",
+  efdtstgt_3 =		"100002fcXRR",
+  efdtstgt_2 =		"100002fc-RR",
+  efdtstlt_3 =		"100002fdXRR",
+  efdtstlt_2 =		"100002fd-RR",
+  efdtsteq_3 =		"100002feXRR",
+  efdtsteq_2 =		"100002fe-RR",
+  evlddx_3 =		"10000300RR0R",
+  evldd_2 =		"10000301R8",
+  evldwx_3 =		"10000302RR0R",
+  evldw_2 =		"10000303R8",
+  evldhx_3 =		"10000304RR0R",
+  evldh_2 =		"10000305R8",
+  evlwhex_3 =		"10000310RR0R",
+  evlwhe_2 =		"10000311R4",
+  evlwhoux_3 =		"10000314RR0R",
+  evlwhou_2 =		"10000315R4",
+  evlwhosx_3 =		"10000316RR0R",
+  evlwhos_2 =		"10000317R4",
+  evstddx_3 =		"10000320RR0R",
+  evstdd_2 =		"10000321R8",
+  evstdwx_3 =		"10000322RR0R",
+  evstdw_2 =		"10000323R8",
+  evstdhx_3 =		"10000324RR0R",
+  evstdh_2 =		"10000325R8",
+  evstwhex_3 =		"10000330RR0R",
+  evstwhe_2 =		"10000331R4",
+  evstwhox_3 =		"10000334RR0R",
+  evstwho_2 =		"10000335R4",
+  evstwwex_3 =		"10000338RR0R",
+  evstwwe_2 =		"10000339R4",
+  evstwwox_3 =		"1000033cRR0R",
+  evstwwo_2 =		"1000033dR4",
+  evmhessf_3 =		"10000403RRR",
+  evmhossf_3 =		"10000407RRR",
+  evmheumi_3 =		"10000408RRR",
+  evmhesmi_3 =		"10000409RRR",
+  evmhesmf_3 =		"1000040bRRR",
+  evmhoumi_3 =		"1000040cRRR",
+  evmhosmi_3 =		"1000040dRRR",
+  evmhosmf_3 =		"1000040fRRR",
+  evmhessfa_3 =		"10000423RRR",
+  evmhossfa_3 =		"10000427RRR",
+  evmheumia_3 =		"10000428RRR",
+  evmhesmia_3 =		"10000429RRR",
+  evmhesmfa_3 =		"1000042bRRR",
+  evmhoumia_3 =		"1000042cRRR",
+  evmhosmia_3 =		"1000042dRRR",
+  evmhosmfa_3 =		"1000042fRRR",
+  evmwhssf_3 =		"10000447RRR",
+  evmwlumi_3 =		"10000448RRR",
+  evmwhumi_3 =		"1000044cRRR",
+  evmwhsmi_3 =		"1000044dRRR",
+  evmwhsmf_3 =		"1000044fRRR",
+  evmwssf_3 =		"10000453RRR",
+  evmwumi_3 =		"10000458RRR",
+  evmwsmi_3 =		"10000459RRR",
+  evmwsmf_3 =		"1000045bRRR",
+  evmwhssfa_3 =		"10000467RRR",
+  evmwlumia_3 =		"10000468RRR",
+  evmwhumia_3 =		"1000046cRRR",
+  evmwhsmia_3 =		"1000046dRRR",
+  evmwhsmfa_3 =		"1000046fRRR",
+  evmwssfa_3 =		"10000473RRR",
+  evmwumia_3 =		"10000478RRR",
+  evmwsmia_3 =		"10000479RRR",
+  evmwsmfa_3 =		"1000047bRRR",
+  evmra_2 =		"100004c4RR",
+  evdivws_3 =		"100004c6RRR",
+  evdivwu_3 =		"100004c7RRR",
+  evmwssfaa_3 =		"10000553RRR",
+  evmwumiaa_3 =		"10000558RRR",
+  evmwsmiaa_3 =		"10000559RRR",
+  evmwsmfaa_3 =		"1000055bRRR",
+  evmwssfan_3 =		"100005d3RRR",
+  evmwumian_3 =		"100005d8RRR",
+  evmwsmian_3 =		"100005d9RRR",
+  evmwsmfan_3 =		"100005dbRRR",
+  evmergehilo_3 =	"1000022eRRR",
+  evmergelohi_3 =	"1000022fRRR",
+  evlhhesplatx_3 =	"10000308RR0R",
+  evlhhesplat_2 =	"10000309R2",
+  evlhhousplatx_3 =	"1000030cRR0R",
+  evlhhousplat_2 =	"1000030dR2",
+  evlhhossplatx_3 =	"1000030eRR0R",
+  evlhhossplat_2 =	"1000030fR2",
+  evlwwsplatx_3 =	"10000318RR0R",
+  evlwwsplat_2 =	"10000319R4",
+  evlwhsplatx_3 =	"1000031cRR0R",
+  evlwhsplat_2 =	"1000031dR4",
+  evaddusiaaw_2 =	"100004c0RR",
+  evaddssiaaw_2 =	"100004c1RR",
+  evsubfusiaaw_2 =	"100004c2RR",
+  evsubfssiaaw_2 =	"100004c3RR",
+  evaddumiaaw_2 =	"100004c8RR",
+  evaddsmiaaw_2 =	"100004c9RR",
+  evsubfumiaaw_2 =	"100004caRR",
+  evsubfsmiaaw_2 =	"100004cbRR",
+  evmheusiaaw_3 =	"10000500RRR",
+  evmhessiaaw_3 =	"10000501RRR",
+  evmhessfaaw_3 =	"10000503RRR",
+  evmhousiaaw_3 =	"10000504RRR",
+  evmhossiaaw_3 =	"10000505RRR",
+  evmhossfaaw_3 =	"10000507RRR",
+  evmheumiaaw_3 =	"10000508RRR",
+  evmhesmiaaw_3 =	"10000509RRR",
+  evmhesmfaaw_3 =	"1000050bRRR",
+  evmhoumiaaw_3 =	"1000050cRRR",
+  evmhosmiaaw_3 =	"1000050dRRR",
+  evmhosmfaaw_3 =	"1000050fRRR",
+  evmhegumiaa_3 =	"10000528RRR",
+  evmhegsmiaa_3 =	"10000529RRR",
+  evmhegsmfaa_3 =	"1000052bRRR",
+  evmhogumiaa_3 =	"1000052cRRR",
+  evmhogsmiaa_3 =	"1000052dRRR",
+  evmhogsmfaa_3 =	"1000052fRRR",
+  evmwlusiaaw_3 =	"10000540RRR",
+  evmwlssiaaw_3 =	"10000541RRR",
+  evmwlumiaaw_3 =	"10000548RRR",
+  evmwlsmiaaw_3 =	"10000549RRR",
+  evmheusianw_3 =	"10000580RRR",
+  evmhessianw_3 =	"10000581RRR",
+  evmhessfanw_3 =	"10000583RRR",
+  evmhousianw_3 =	"10000584RRR",
+  evmhossianw_3 =	"10000585RRR",
+  evmhossfanw_3 =	"10000587RRR",
+  evmheumianw_3 =	"10000588RRR",
+  evmhesmianw_3 =	"10000589RRR",
+  evmhesmfanw_3 =	"1000058bRRR",
+  evmhoumianw_3 =	"1000058cRRR",
+  evmhosmianw_3 =	"1000058dRRR",
+  evmhosmfanw_3 =	"1000058fRRR",
+  evmhegumian_3 =	"100005a8RRR",
+  evmhegsmian_3 =	"100005a9RRR",
+  evmhegsmfan_3 =	"100005abRRR",
+  evmhogumian_3 =	"100005acRRR",
+  evmhogsmian_3 =	"100005adRRR",
+  evmhogsmfan_3 =	"100005afRRR",
+  evmwlusianw_3 =	"100005c0RRR",
+  evmwlssianw_3 =	"100005c1RRR",
+  evmwlumianw_3 =	"100005c8RRR",
+  evmwlsmianw_3 =	"100005c9RRR",
+
+  -- NYI: Book E instructions.
+}
+
+-- Add mnemonics for "." variants.
+do
+  local t = {}
+  for k,v in pairs(map_op) do
+    if sub(v, -1) == "." then
+      local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
+      t[sub(k, 1, -3).."."..sub(k, -2)] = v2
+    end
+  end
+  for k,v in pairs(t) do
+    map_op[k] = v
+  end
+end
+
+-- Add more branch mnemonics.
+for cond,c in pairs(map_cond) do
+  local b1 = "b"..cond
+  local c1 = shl(band(c, 3), 16) + (c < 4 and 0x01000000 or 0)
+  -- bX[l]
+  map_op[b1.."_1"] = tohex(0x40800000 + c1).."K"
+  map_op[b1.."y_1"] = tohex(0x40a00000 + c1).."K"
+  map_op[b1.."l_1"] = tohex(0x40800001 + c1).."K"
+  map_op[b1.."_2"] = tohex(0x40800000 + c1).."-XK"
+  map_op[b1.."y_2"] = tohex(0x40a00000 + c1).."-XK"
+  map_op[b1.."l_2"] = tohex(0x40800001 + c1).."-XK"
+  -- bXlr[l]
+  map_op[b1.."lr_0"] = tohex(0x4c800020 + c1)
+  map_op[b1.."lrl_0"] = tohex(0x4c800021 + c1)
+  map_op[b1.."ctr_0"] = tohex(0x4c800420 + c1)
+  map_op[b1.."ctrl_0"] = tohex(0x4c800421 + c1)
+  -- bXctr[l]
+  map_op[b1.."lr_1"] = tohex(0x4c800020 + c1).."-X"
+  map_op[b1.."lrl_1"] = tohex(0x4c800021 + c1).."-X"
+  map_op[b1.."ctr_1"] = tohex(0x4c800420 + c1).."-X"
+  map_op[b1.."ctrl_1"] = tohex(0x4c800421 + c1).."-X"
+end
+
+------------------------------------------------------------------------------
+
+local function parse_gpr(expr)
+  local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local r = match(expr, "^r([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r, tp end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_fpr(expr)
+  local r = match(expr, "^f([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_cr(expr)
+  local r = match(expr, "^cr([0-7])$")
+  if r then return tonumber(r) end
+  werror("bad condition register name `"..expr.."'")
+end
+
+local function parse_cond(expr)
+  local r, cond = match(expr, "^4%*cr([0-7])%+(%w%w)$")
+  if r then
+    r = tonumber(r)
+    local c = map_cond[cond]
+    if c and c < 4 then return r*4+c end
+  end
+  werror("bad condition bit name `"..expr.."'")
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+  local n = tonumber(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n then
+      if signed then
+	local s = sar(m, bits-1)
+	if s == 0 then return shl(m, shift)
+	elseif s == -1 then return shl(m + shl(1, bits), shift) end
+      else
+	if sar(m, bits) == 0 then return shl(m, shift) end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif match(imm, "^r([1-3]?[0-9])$") or
+	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
+    werror("expected immediate operand, got register")
+  else
+    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    return 0
+  end
+end
+
+local function parse_shiftmask(imm, isshift)
+  local n = tonumber(imm)
+  if n then
+    if shr(n, 6) == 0 then
+      local lsb = band(imm, 31)
+      local msb = imm - lsb
+      return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif match(imm, "^r([1-3]?[0-9])$") or
+	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
+    werror("expected immediate operand, got register")
+  else
+    werror("NYI: parameterized 64 bit shift/mask")
+  end
+end
+
+local function parse_disp(disp)
+  local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
+  if imm then
+    local r = parse_gpr(reg)
+    if r == 0 then werror("cannot use r0 in displacement") end
+    return shl(r, 16) + parse_imm(imm, 16, 0, 0, true)
+  end
+  local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
+  if reg and tailr ~= "" then
+    local r, tp = parse_gpr(reg)
+    if r == 0 then werror("cannot use r0 in displacement") end
+    if tp then
+      waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr))
+      return shl(r, 16)
+    end
+  end
+  werror("bad displacement `"..disp.."'")
+end
+
+local function parse_u5disp(disp, scale)
+  local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
+  if imm then
+    local r = parse_gpr(reg)
+    if r == 0 then werror("cannot use r0 in displacement") end
+    return shl(r, 16) + parse_imm(imm, 5, 11, scale, false)
+  end
+  local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
+  if reg and tailr ~= "" then
+    local r, tp = parse_gpr(reg)
+    if r == 0 then werror("cannot use r0 in displacement") end
+    if tp then
+      waction("IMM", scale*1024+5*32+11, format(tp.ctypefmt, tailr))
+      return shl(r, 16)
+    end
+  end
+  werror("bad displacement `"..disp.."'")
+end
+
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+map_op[".template__"] = function(params, template, nparams)
+  if not params then return sub(template, 9) end
+  local op = tonumber(sub(template, 1, 8), 16)
+  local n, rs = 1, 26
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 3 positions (rlwinm).
+  if secpos+3 > maxsecpos then wflush() end
+  local pos = wpos()
+
+  -- Process each character.
+  for p in gmatch(sub(template, 9), ".") do
+    if p == "R" then
+      rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
+    elseif p == "F" then
+      rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
+    elseif p == "A" then
+      rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
+    elseif p == "S" then
+      rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, true); n = n + 1
+    elseif p == "I" then
+      op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1
+    elseif p == "U" then
+      op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1
+    elseif p == "D" then
+      op = op + parse_disp(params[n]); n = n + 1
+    elseif p == "2" then
+      op = op + parse_u5disp(params[n], 1); n = n + 1
+    elseif p == "4" then
+      op = op + parse_u5disp(params[n], 2); n = n + 1
+    elseif p == "8" then
+      op = op + parse_u5disp(params[n], 3); n = n + 1
+    elseif p == "C" then
+      rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
+    elseif p == "X" then
+      rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
+    elseif p == "W" then
+      op = op + parse_cr(params[n]); n = n + 1
+    elseif p == "G" then
+      op = op + parse_imm(params[n], 8, 12, 0, false); n = n + 1
+    elseif p == "H" then
+      op = op + parse_shiftmask(params[n], true); n = n + 1
+    elseif p == "M" then
+      op = op + parse_shiftmask(params[n], false); n = n + 1
+    elseif p == "J" or p == "K" then
+      local mode, n, s = parse_label(params[n], false)
+      if p == "K" then n = n + 2048 end
+      waction("REL_"..mode, n, s, 1)
+      n = n + 1
+    elseif p == "0" then
+      if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end
+    elseif p == "=" or p == "%" then
+      local t = band(shr(op, p == "%" and rs+5 or rs), 31)
+      rs = rs - 5
+      op = op + shl(t, rs)
+    elseif p == "~" then
+      local mm = shl(31, rs)
+      local lo = band(op, mm)
+      local hi = band(op, shl(mm, 5))
+      op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
+    elseif p == "-" then
+      rs = rs - 5
+    elseif p == "." then
+      -- Ignored.
+    else
+      assert(false)
+    end
+  end
+  wputpos(pos, op)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _,p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+

+ 83 - 0
luajit.mod/luajit/dynasm/dasm_proto.h

@@ -0,0 +1,83 @@
+/*
+** DynASM encoding engine prototypes.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#ifndef _DASM_PROTO_H
+#define _DASM_PROTO_H
+
+#include <stddef.h>
+#include <stdarg.h>
+
+#define DASM_IDENT	"DynASM 1.3.0"
+#define DASM_VERSION	10300	/* 1.3.0 */
+
+#ifndef Dst_DECL
+#define Dst_DECL	dasm_State **Dst
+#endif
+
+#ifndef Dst_REF
+#define Dst_REF		(*Dst)
+#endif
+
+#ifndef DASM_FDEF
+#define DASM_FDEF	extern
+#endif
+
+#ifndef DASM_M_GROW
+#define DASM_M_GROW(ctx, t, p, sz, need) \
+  do { \
+    size_t _sz = (sz), _need = (need); \
+    if (_sz < _need) { \
+      if (_sz < 16) _sz = 16; \
+      while (_sz < _need) _sz += _sz; \
+      (p) = (t *)realloc((p), _sz); \
+      if ((p) == NULL) exit(1); \
+      (sz) = _sz; \
+    } \
+  } while(0)
+#endif
+
+#ifndef DASM_M_FREE
+#define DASM_M_FREE(ctx, p, sz)	free(p)
+#endif
+
+/* Internal DynASM encoder state. */
+typedef struct dasm_State dasm_State;
+
+
+/* Initialize and free DynASM state. */
+DASM_FDEF void dasm_init(Dst_DECL, int maxsection);
+DASM_FDEF void dasm_free(Dst_DECL);
+
+/* Setup global array. Must be called before dasm_setup(). */
+DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl);
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc);
+
+/* Setup encoder. */
+DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist);
+
+/* Feed encoder with actions. Calls are generated by pre-processor. */
+DASM_FDEF void dasm_put(Dst_DECL, int start, ...);
+
+/* Link sections and return the resulting size. */
+DASM_FDEF int dasm_link(Dst_DECL, size_t *szp);
+
+/* Encode sections into buffer. */
+DASM_FDEF int dasm_encode(Dst_DECL, void *buffer);
+
+/* Get PC label offset. */
+DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc);
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch);
+#else
+#define dasm_checkstep(a, b)	0
+#endif
+
+
+#endif /* _DASM_PROTO_H */

+ 12 - 0
luajit.mod/luajit/dynasm/dasm_x64.lua

@@ -0,0 +1,12 @@
+------------------------------------------------------------------------------
+-- DynASM x64 module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+-- This module just sets 64 bit mode for the combined x86/x64 module.
+-- All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+x64 = true -- Using a global is an ugly, but effective solution.
+return require("dasm_x86")

+ 471 - 0
luajit.mod/luajit/dynasm/dasm_x86.h

@@ -0,0 +1,471 @@
+/*
+** DynASM x86 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"x86"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. DASM_STOP must be 255. */
+enum {
+  DASM_DISP = 233,
+  DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB,
+  DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC,
+  DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN,
+  DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_VREG	0x15000000
+#define DASM_S_UNDEF_L		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned char *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals (bias -10). */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
+  }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs, mrm = 4;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    int action = *p++;
+    if (action < DASM_DISP) {
+      ofs++;
+    } else if (action <= DASM_REL_A) {
+      int n = va_arg(ap, int);
+      b[pos++] = n;
+      switch (action) {
+      case DASM_DISP:
+	if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
+      case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
+      case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
+      case DASM_IMM_D: ofs += 4; break;
+      case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
+      case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
+      case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
+      case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
+      case DASM_SPACE: p++; ofs += n; break;
+      case DASM_SETLABEL: b[pos-2] = -0x40000000; break;  /* Neg. label ofs. */
+      case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
+	if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
+      }
+      mrm = 4;
+    } else {
+      int *pl, n;
+      switch (action) {
+      case DASM_REL_LG:
+      case DASM_IMM_LG:
+	n = *p++; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n <= 246) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl -= 246; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+      case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	ofs += 4;  /* Maximum offset needed. */
+	if (action == DASM_REL_LG || action == DASM_REL_PC)
+	  b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; }
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_ALIGN:
+	ofs += *p++;  /* Maximum alignment needed (arg is 2**n-1). */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_EXTERN: p += 2; ofs += 4; break;
+      case DASM_ESC: p++; ofs++; break;
+      case DASM_MARK: mrm = p[-2]; break;
+      case DASM_SECTION:
+	n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n];
+      case DASM_STOP: goto stop;
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	int op, action = *p++;
+	switch (action) {
+	case DASM_REL_LG: p++; op = p[-3]; goto rel_pc;
+	case DASM_REL_PC: op = p[-2]; rel_pc: {
+	  int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0);
+	  if (shrink) {  /* Shrinkable branch opcode? */
+	    int lofs, lpos = b[pos];
+	    if (lpos < 0) goto noshrink;  /* Ext global? */
+	    lofs = *DASM_POS2PTR(D, lpos);
+	    if (lpos > pos) {  /* Fwd label: add cumulative section offsets. */
+	      int i;
+	      for (i = secnum; i < DASM_POS2SEC(lpos); i++)
+		lofs += D->sections[i].ofs;
+	    } else {
+	      lofs -= ofs;  /* Bkwd label: unfix offset. */
+	    }
+	    lofs -= b[pos+1];  /* Short branch ok? */
+	    if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink;  /* Yes. */
+	    else { noshrink: shrink = 0; }  /* No, cannot shrink op. */
+	  }
+	  b[pos+1] = shrink;
+	  pos += 2;
+	  break;
+	}
+	case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
+	case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
+	case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
+	case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
+	case DASM_LABEL_LG: p++;
+	case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
+	case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
+	case DASM_EXTERN: p += 2; break;
+	case DASM_ESC: p++; break;
+	case DASM_MARK: break;
+	case DASM_SECTION: case DASM_STOP: goto stop;
+	}
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#define dasmb(x)	*cp++ = (unsigned char)(x)
+#ifndef DASM_ALIGNED_WRITES
+#define dasmw(x) \
+  do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
+#define dasmd(x) \
+  do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
+#else
+#define dasmw(x)	do { dasmb(x); dasmb((x)>>8); } while (0)
+#define dasmd(x)	do { dasmw(x); dasmw((x)>>16); } while (0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  unsigned char *base = (unsigned char *)buffer;
+  unsigned char *cp = base;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      unsigned char *mark = NULL;
+      while (1) {
+	int action = *p++;
+	int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0;
+	switch (action) {
+	case DASM_DISP: if (!mark) mark = cp; {
+	  unsigned char *mm = mark;
+	  if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL;
+	  if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7;
+	    if (mrm != 5) { mm[-1] -= 0x80; break; } }
+	  if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
+	}
+	case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
+	case DASM_IMM_DB: if (((n+128)&-256) == 0) {
+	    db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
+	  } else mark = NULL;
+	case DASM_IMM_D: wd: dasmd(n); break;
+	case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
+	case DASM_IMM_W: dasmw(n); break;
+	case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
+	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
+	  b++; n = (int)(ptrdiff_t)D->globals[-n];
+	case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
+	case DASM_REL_PC: rel_pc: {
+	  int shrink = *b++;
+	  int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
+	  n = *pb - ((int)(cp-base) + 4-shrink);
+	  if (shrink == 0) goto wd;
+	  if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb;
+	  goto wb;
+	}
+	case DASM_IMM_LG:
+	  p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
+	case DASM_IMM_PC: {
+	  int *pb = DASM_POS2PTR(D, n);
+	  n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
+	  goto wd;
+	}
+	case DASM_LABEL_LG: {
+	  int idx = *p++;
+	  if (idx >= 10)
+	    D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
+	  break;
+	}
+	case DASM_LABEL_PC: case DASM_SETLABEL: break;
+	case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; }
+	case DASM_ALIGN:
+	  n = *p++;
+	  while (((cp-base) & n)) *cp++ = 0x90; /* nop */
+	  break;
+	case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
+	case DASM_MARK: mark = cp; break;
+	case DASM_ESC: action = *p++;
+	default: *cp++ = action; break;
+	case DASM_SECTION: case DASM_STOP: goto stop;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
+  return D->status;
+}
+#endif
+

+ 1945 - 0
luajit.mod/luajit/dynasm/dasm_x86.lua

@@ -0,0 +1,1945 @@
+------------------------------------------------------------------------------
+-- DynASM x86/x64 module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+local x64 = x64
+
+-- Module information:
+local _info = {
+  arch =	x64 and "x64" or "x86",
+  description =	"DynASM x86/x64 module",
+  version =	"1.3.0",
+  vernum =	 10300,
+  release =	"2011-05-05",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
+local concat, sort = table.concat, table.sort
+local bit = bit or require("bit")
+local band, shl, shr = bit.band, bit.lshift, bit.rshift
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  -- int arg, 1 buffer pos:
+  "DISP",  "IMM_S", "IMM_B", "IMM_W", "IMM_D",  "IMM_WB", "IMM_DB",
+  -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
+  "VREG", "SPACE", -- !x64: VREG support NYI.
+  -- ptrdiff_t arg, 1 buffer pos (address): !x64
+  "SETLABEL", "REL_A",
+  -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
+  "REL_LG", "REL_PC",
+  -- action arg (1 byte) or int arg, 1 buffer pos (link):
+  "IMM_LG", "IMM_PC",
+  -- action arg (1 byte) or int arg, 1 buffer pos (offset):
+  "LABEL_LG", "LABEL_PC",
+  -- action arg (1 byte), 1 buffer pos (offset):
+  "ALIGN",
+  -- action args (2 bytes), no buffer pos.
+  "EXTERN",
+  -- action arg (1 byte), no buffer pos.
+  "ESC",
+  -- no action arg, no buffer pos.
+  "MARK",
+  -- action arg (1 byte), no buffer pos, terminal action:
+  "SECTION",
+  -- no args, no buffer pos, terminal action:
+  "STOP"
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number (dynamically generated below).
+local map_action = {}
+-- First action number. Everything below does not need to be escaped.
+local actfirst = 256-#action_names
+
+-- Action list buffer and string (only used to remove dupes).
+local actlist = {}
+local actstr = ""
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Compute action numbers for action names.
+for n,name in ipairs(action_names) do
+  local num = actfirst + n - 1
+  map_action[name] = num
+end
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  local last = actlist[nn] or 255
+  actlist[nn] = nil -- Remove last byte.
+  if nn == 0 then nn = 1 end
+  out:write("static const unsigned char ", name, "[", nn, "] = {\n")
+  local s = "  "
+  for n,b in ipairs(actlist) do
+    s = s..b..","
+    if #s >= 75 then
+      assert(out:write(s, "\n"))
+      s = "  "
+    end
+  end
+  out:write(s, last, "\n};\n\n") -- Add last byte back.
+end
+
+------------------------------------------------------------------------------
+
+-- Add byte to action list.
+local function wputxb(n)
+  assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, a, num)
+  wputxb(assert(map_action[action], "bad action name `"..action.."'"))
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Add call to embedded DynASM C code.
+local function wcall(func, args)
+  wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
+end
+
+-- Delete duplicate action list chunks. A tad slow, but so what.
+local function dedupechunk(offset)
+  local al, as = actlist, actstr
+  local chunk = char(unpack(al, offset+1, #al))
+  local orig = find(as, chunk, 1, true)
+  if orig then
+    actargs[1] = orig-1 -- Replace with original offset.
+    for i=offset+1,#al do al[i] = nil end -- Kill dupe.
+  else
+    actstr = as..chunk
+  end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  local offset = actargs[1]
+  if #actlist == offset then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  dedupechunk(offset)
+  wcall("put", actargs) -- Add call to dasm_put().
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped byte.
+local function wputb(n)
+  if n >= actfirst then waction("ESC") end -- Need to escape byte.
+  wputxb(n)
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 10
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 246 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=10,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=10,next_global-1 do
+    out:write("  ", prefix, gsub(t[i], "@.*", ""), ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=10,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = -1
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n < -256 then werror("too many extern labels") end
+  next_extern = n - 1
+  t[name] = n
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  local t = {}
+  for name, n in pairs(map_extern) do t[-n] = name end
+  out:write("Extern labels:\n")
+  for i=1,-next_extern-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  local t = {}
+  for name, n in pairs(map_extern) do t[-n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=1,-next_extern-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+local map_archdef = {}		-- Ext. register name -> int. name.
+local map_reg_rev = {}		-- Int. register name -> ext. name.
+local map_reg_num = {}		-- Int. register name -> register number.
+local map_reg_opsize = {}	-- Int. register name -> operand size.
+local map_reg_valid_base = {}	-- Int. register name -> valid base register?
+local map_reg_valid_index = {}	-- Int. register name -> valid index register?
+local map_reg_needrex = {}	-- Int. register name -> need rex vs. no rex.
+local reg_list = {}		-- Canonical list of int. register names.
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for _PTx macros).
+
+local addrsize = x64 and "q" or "d"	-- Size for address operands.
+
+-- Helper functions to fill register maps.
+local function mkrmap(sz, cl, names)
+  local cname = format("@%s", sz)
+  reg_list[#reg_list+1] = cname
+  map_archdef[cl] = cname
+  map_reg_rev[cname] = cl
+  map_reg_num[cname] = -1
+  map_reg_opsize[cname] = sz
+  if sz == addrsize or sz == "d" then
+    map_reg_valid_base[cname] = true
+    map_reg_valid_index[cname] = true
+  end
+  if names then
+    for n,name in ipairs(names) do
+      local iname = format("@%s%x", sz, n-1)
+      reg_list[#reg_list+1] = iname
+      map_archdef[name] = iname
+      map_reg_rev[iname] = name
+      map_reg_num[iname] = n-1
+      map_reg_opsize[iname] = sz
+      if sz == "b" and n > 4 then map_reg_needrex[iname] = false end
+      if sz == addrsize or sz == "d" then
+	map_reg_valid_base[iname] = true
+	map_reg_valid_index[iname] = true
+      end
+    end
+  end
+  for i=0,(x64 and sz ~= "f") and 15 or 7 do
+    local needrex = sz == "b" and i > 3
+    local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
+    if needrex then map_reg_needrex[iname] = true end
+    local name
+    if sz == "o" then name = format("xmm%d", i)
+    elseif sz == "f" then name = format("st%d", i)
+    else name = format("r%d%s", i, sz == addrsize and "" or sz) end
+    map_archdef[name] = iname
+    if not map_reg_rev[iname] then
+      reg_list[#reg_list+1] = iname
+      map_reg_rev[iname] = name
+      map_reg_num[iname] = i
+      map_reg_opsize[iname] = sz
+      if sz == addrsize or sz == "d" then
+	map_reg_valid_base[iname] = true
+	map_reg_valid_index[iname] = true
+      end
+    end
+  end
+  reg_list[#reg_list+1] = ""
+end
+
+-- Integer registers (qword, dword, word and byte sized).
+if x64 then
+  mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
+end
+mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
+mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
+mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
+map_reg_valid_index[map_archdef.esp] = false
+if x64 then map_reg_valid_index[map_archdef.rsp] = false end
+map_archdef["Ra"] = "@"..addrsize
+
+-- FP registers (internally tword sized, but use "f" as operand size).
+mkrmap("f", "Rf")
+
+-- SSE registers (oword sized, but qword and dword accessible).
+mkrmap("o", "xmm")
+
+-- Operand size prefixes to codes.
+local map_opsize = {
+  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
+  aword = addrsize,
+}
+
+-- Operand size code to number.
+local map_opsizenum = {
+  b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
+}
+
+-- Operand size code to name.
+local map_opsizename = {
+  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
+  f = "fpword",
+}
+
+-- Valid index register scale factors.
+local map_xsc = {
+  ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
+}
+
+-- Condition codes.
+local map_cc = {
+  o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
+  s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
+  c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
+  pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
+}
+
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  return gsub(s, "@%w+", map_reg_rev)
+end
+
+-- Dump register names and numbers
+local function dumpregs(out)
+  out:write("Register names, sizes and internal numbers:\n")
+  for _,reg in ipairs(reg_list) do
+    if reg == "" then
+      out:write("\n")
+    else
+      local name = map_reg_rev[reg]
+      local num = map_reg_num[reg]
+      local opsize = map_opsizename[map_reg_opsize[reg]]
+      out:write(format("  %-5s %-8s %s\n", name, opsize,
+		       num < 0 and "(variable)" or num))
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
+local function wputlabel(aprefix, imm, num)
+  if type(imm) == "number" then
+    if imm < 0 then
+      waction("EXTERN")
+      wputxb(aprefix == "IMM_" and 0 or 1)
+      imm = -imm-1
+    else
+      waction(aprefix.."LG", nil, num);
+    end
+    wputxb(imm)
+  else
+    waction(aprefix.."PC", imm, num)
+  end
+end
+
+-- Put signed byte or arg.
+local function wputsbarg(n)
+  if type(n) == "number" then
+    if n < -128 or n > 127 then
+      werror("signed immediate byte out of range")
+    end
+    if n < 0 then n = n + 256 end
+    wputb(n)
+  else waction("IMM_S", n) end
+end
+
+-- Put unsigned byte or arg.
+local function wputbarg(n)
+  if type(n) == "number" then
+    if n < 0 or n > 255 then
+      werror("unsigned immediate byte out of range")
+    end
+    wputb(n)
+  else waction("IMM_B", n) end
+end
+
+-- Put unsigned word or arg.
+local function wputwarg(n)
+  if type(n) == "number" then
+    if shr(n, 16) ~= 0 then
+      werror("unsigned immediate word out of range")
+    end
+    wputb(band(n, 255)); wputb(shr(n, 8));
+  else waction("IMM_W", n) end
+end
+
+-- Put signed or unsigned dword or arg.
+local function wputdarg(n)
+  local tn = type(n)
+  if tn == "number" then
+    wputb(band(n, 255))
+    wputb(band(shr(n, 8), 255))
+    wputb(band(shr(n, 16), 255))
+    wputb(shr(n, 24))
+  elseif tn == "table" then
+    wputlabel("IMM_", n[1], 1)
+  else
+    waction("IMM_D", n)
+  end
+end
+
+-- Put operand-size dependent number or arg (defaults to dword).
+local function wputszarg(sz, n)
+  if not sz or sz == "d" or sz == "q" then wputdarg(n)
+  elseif sz == "w" then wputwarg(n)
+  elseif sz == "b" then wputbarg(n)
+  elseif sz == "s" then wputsbarg(n)
+  else werror("bad operand size") end
+end
+
+-- Put multi-byte opcode with operand-size dependent modifications.
+local function wputop(sz, op, rex)
+  local r
+  if rex ~= 0 and not x64 then werror("bad operand size") end
+  if sz == "w" then wputb(102) end
+  -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
+  if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
+  if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end
+  if op >= 65536 then
+    if rex ~= 0 then
+      local opc3 = band(op, 0xffff00)
+      if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
+	wputb(64 + band(rex, 15)); rex = 0
+      end
+    end
+    wputb(shr(op, 16)); op = band(op, 0xffff)
+  end
+  if op >= 256 then
+    local b = shr(op, 8)
+    if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
+    wputb(b)
+    op = band(op, 255)
+  end
+  if rex ~= 0 then wputb(64 + band(rex, 15)) end
+  if sz == "b" then op = op - 1 end
+  wputb(op)
+end
+
+-- Put ModRM or SIB formatted byte.
+local function wputmodrm(m, s, rm, vs, vrm)
+  assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
+  wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7))
+end
+
+-- Put ModRM/SIB plus optional displacement.
+local function wputmrmsib(t, imark, s, vsreg)
+  local vreg, vxreg
+  local reg, xreg = t.reg, t.xreg
+  if reg and reg < 0 then reg = 0; vreg = t.vreg end
+  if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
+  if s < 0 then s = 0 end
+
+  -- Register mode.
+  if sub(t.mode, 1, 1) == "r" then
+    wputmodrm(3, s, reg)
+    if vsreg then waction("VREG", vsreg); wputxb(2) end
+    if vreg then waction("VREG", vreg); wputxb(0) end
+    return
+  end
+
+  local disp = t.disp
+  local tdisp = type(disp)
+  -- No base register?
+  if not reg then
+    local riprel = false
+    if xreg then
+      -- Indexed mode with index register only.
+      -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
+      wputmodrm(0, s, 4)
+      if imark == "I" then waction("MARK") end
+      if vsreg then waction("VREG", vsreg); wputxb(2) end
+      wputmodrm(t.xsc, xreg, 5)
+      if vxreg then waction("VREG", vxreg); wputxb(3) end
+    else
+      -- Pure 32 bit displacement.
+      if x64 and tdisp ~= "table" then
+	wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
+	if imark == "I" then waction("MARK") end
+	wputmodrm(0, 4, 5)
+      else
+	riprel = x64
+	wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
+	if imark == "I" then waction("MARK") end
+      end
+      if vsreg then waction("VREG", vsreg); wputxb(2) end
+    end
+    if riprel then -- Emit rip-relative displacement.
+      if match("UWSiI", imark) then
+	werror("NYI: rip-relative displacement followed by immediate")
+      end
+      -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
+      wputlabel("REL_", disp[1], 2)
+    else
+      wputdarg(disp)
+    end
+    return
+  end
+
+  local m
+  if tdisp == "number" then -- Check displacement size at assembly time.
+    if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
+      if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
+    elseif disp >= -128 and disp <= 127 then m = 1
+    else m = 2 end
+  elseif tdisp == "table" then
+    m = 2
+  end
+
+  -- Index register present or esp as base register: need SIB encoding.
+  if xreg or band(reg, 7) == 4 then
+    wputmodrm(m or 2, s, 4) -- ModRM.
+    if m == nil or imark == "I" then waction("MARK") end
+    if vsreg then waction("VREG", vsreg); wputxb(2) end
+    wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
+    if vxreg then waction("VREG", vxreg); wputxb(3) end
+    if vreg then waction("VREG", vreg); wputxb(1) end
+  else
+    wputmodrm(m or 2, s, reg) -- ModRM.
+    if (imark == "I" and (m == 1 or m == 2)) or
+       (m == nil and (vsreg or vreg)) then waction("MARK") end
+    if vsreg then waction("VREG", vsreg); wputxb(2) end
+    if vreg then waction("VREG", vreg); wputxb(1) end
+  end
+
+  -- Put displacement.
+  if m == 1 then wputsbarg(disp)
+  elseif m == 2 then wputdarg(disp)
+  elseif m == nil then waction("DISP", disp) end
+end
+
+------------------------------------------------------------------------------
+
+-- Return human-readable operand mode string.
+local function opmodestr(op, args)
+  local m = {}
+  for i=1,#args do
+    local a = args[i]
+    m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
+  end
+  return op.." "..concat(m, ",")
+end
+
+-- Convert number to valid integer or nil.
+local function toint(expr)
+  local n = tonumber(expr)
+  if n then
+    if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
+      werror("bad integer number `"..expr.."'")
+    end
+    return n
+  end
+end
+
+-- Parse immediate expression.
+local function immexpr(expr)
+  -- &expr (pointer)
+  if sub(expr, 1, 1) == "&" then
+    return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
+  end
+
+  local prefix = sub(expr, 1, 2)
+  -- =>expr (pc label reference)
+  if prefix == "=>" then
+    return "iJ", sub(expr, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "iJ", map_global[sub(expr, 3)]
+  end
+
+  -- [<>][1-9] (local label reference)
+  local dir, lnum = match(expr, "^([<>])([1-9])$")
+  if dir then -- Fwd: 247-255, Bkwd: 1-9.
+    return "iJ", lnum + (dir == ">" and 246 or 0)
+  end
+
+  local extname = match(expr, "^extern%s+(%S+)$")
+  if extname then
+    return "iJ", map_extern[extname]
+  end
+
+  -- expr (interpreted as immediate)
+  return "iI", expr
+end
+
+-- Parse displacement expression: +-num, +-expr, +-opsize*num
+local function dispexpr(expr)
+  local disp = expr == "" and 0 or toint(expr)
+  if disp then return disp end
+  local c, dispt = match(expr, "^([+-])%s*(.+)$")
+  if c == "+" then
+    expr = dispt
+  elseif not c then
+    werror("bad displacement expression `"..expr.."'")
+  end
+  local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
+  local ops, imm = map_opsize[opsize], toint(tailops)
+  if ops and imm then
+    if c == "-" then imm = -imm end
+    return imm*map_opsizenum[ops]
+  end
+  local mode, iexpr = immexpr(dispt)
+  if mode == "iJ" then
+    if c == "-" then werror("cannot invert label reference") end
+    return { iexpr }
+  end
+  return expr -- Need to return original signed expression.
+end
+
+-- Parse register or type expression.
+local function rtexpr(expr)
+  if not expr then return end
+  local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    local rnum = map_reg_num[reg]
+    if not rnum then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    if not map_reg_valid_base[reg] then
+      werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
+    end
+    return reg, rnum, tp
+  end
+  return expr, map_reg_num[expr]
+end
+
+-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
+local function parseoperand(param)
+  local t = {}
+
+  local expr = param
+  local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
+  if opsize then
+    t.opsize = map_opsize[opsize]
+    if t.opsize then expr = tailops end
+  end
+
+  local br = match(expr, "^%[%s*(.-)%s*%]$")
+  repeat
+    if br then
+      t.mode = "xm"
+
+      -- [disp]
+      t.disp = toint(br)
+      if t.disp then
+	t.mode = x64 and "xm" or "xmO"
+	break
+      end
+
+      -- [reg...]
+      local tp
+      local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
+      reg, t.reg, tp = rtexpr(reg)
+      if not t.reg then
+	-- [expr]
+	t.mode = x64 and "xm" or "xmO"
+	t.disp = dispexpr("+"..br)
+	break
+      end
+
+      if t.reg == -1 then
+	t.vreg, tailr = match(tailr, "^(%b())(.*)$")
+	if not t.vreg then werror("bad variable register expression") end
+      end
+
+      -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
+      local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
+      if xsc then
+	if not map_reg_valid_index[reg] then
+	  werror("bad index register `"..map_reg_rev[reg].."'")
+	end
+	t.xsc = map_xsc[xsc]
+	t.xreg = t.reg
+	t.vxreg = t.vreg
+	t.reg = nil
+	t.vreg = nil
+	t.disp = dispexpr(tailsc)
+	break
+      end
+      if not map_reg_valid_base[reg] then
+	werror("bad base register `"..map_reg_rev[reg].."'")
+      end
+
+      -- [reg] or [reg+-disp]
+      t.disp = toint(tailr) or (tailr == "" and 0)
+      if t.disp then break end
+
+      -- [reg+xreg...]
+      local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
+      xreg, t.xreg, tp = rtexpr(xreg)
+      if not t.xreg then
+	-- [reg+-expr]
+	t.disp = dispexpr(tailr)
+	break
+      end
+      if not map_reg_valid_index[xreg] then
+	werror("bad index register `"..map_reg_rev[xreg].."'")
+      end
+
+      if t.xreg == -1 then
+	t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
+	if not t.vxreg then werror("bad variable register expression") end
+      end
+
+      -- [reg+xreg*xsc...]
+      local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
+      if xsc then
+	t.xsc = map_xsc[xsc]
+	tailx = tailsc
+      end
+
+      -- [...] or [...+-disp] or [...+-expr]
+      t.disp = dispexpr(tailx)
+    else
+      -- imm or opsize*imm
+      local imm = toint(expr)
+      if not imm and sub(expr, 1, 1) == "*" and t.opsize then
+	imm = toint(sub(expr, 2))
+	if imm then
+	  imm = imm * map_opsizenum[t.opsize]
+	  t.opsize = nil
+	end
+      end
+      if imm then
+	if t.opsize then werror("bad operand size override") end
+	local m = "i"
+	if imm == 1 then m = m.."1" end
+	if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
+	if imm >= -128 and imm <= 127 then m = m.."S" end
+	t.imm = imm
+	t.mode = m
+	break
+      end
+
+      local tp
+      local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
+      reg, t.reg, tp = rtexpr(reg)
+      if t.reg then
+	if t.reg == -1 then
+	  t.vreg, tailr = match(tailr, "^(%b())(.*)$")
+	  if not t.vreg then werror("bad variable register expression") end
+	end
+	-- reg
+	if tailr == "" then
+	  if t.opsize then werror("bad operand size override") end
+	  t.opsize = map_reg_opsize[reg]
+	  if t.opsize == "f" then
+	    t.mode = t.reg == 0 and "fF" or "f"
+	  else
+	    if reg == "@w4" or (x64 and reg == "@d4") then
+	      wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'"))
+	    end
+	    t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
+	  end
+	  t.needrex = map_reg_needrex[reg]
+	  break
+	end
+
+	-- type[idx], type[idx].field, type->field -> [reg+offset_expr]
+	if not tp then werror("bad operand `"..param.."'") end
+	t.mode = "xm"
+	t.disp = format(tp.ctypefmt, tailr)
+      else
+	t.mode, t.imm = immexpr(expr)
+	if sub(t.mode, -1) == "J" then
+	  if t.opsize and t.opsize ~= addrsize then
+	    werror("bad operand size override")
+	  end
+	  t.opsize = addrsize
+	end
+      end
+    end
+  until true
+  return t
+end
+
+------------------------------------------------------------------------------
+-- x86 Template String Description
+-- ===============================
+--
+-- Each template string is a list of [match:]pattern pairs,
+-- separated by "|". The first match wins. No match means a
+-- bad or unsupported combination of operand modes or sizes.
+--
+-- The match part and the ":" is omitted if the operation has
+-- no operands. Otherwise the first N characters are matched
+-- against the mode strings of each of the N operands.
+--
+-- The mode string for each operand type is (see parseoperand()):
+--   Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
+--   FP register:      "f",  +"F" for st0
+--   Index operand:    "xm", +"O" for [disp] (pure offset)
+--   Immediate:        "i",  +"S" for signed 8 bit, +"1" for 1,
+--                     +"I" for arg, +"P" for pointer
+--   Any:              +"J" for valid jump targets
+--
+-- So a match character "m" (mixed) matches both an integer register
+-- and an index operand (to be encoded with the ModRM/SIB scheme).
+-- But "r" matches only a register and "x" only an index operand
+-- (e.g. for FP memory access operations).
+--
+-- The operand size match string starts right after the mode match
+-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
+-- The effective data size of the operation is matched against this list.
+--
+-- If only the regular "b", "w", "d", "q", "t" operand sizes are
+-- present, then all operands must be the same size. Unspecified sizes
+-- are ignored, but at least one operand must have a size or the pattern
+-- won't match (use the "byte", "word", "dword", "qword", "tword"
+-- operand size overrides. E.g.: mov dword [eax], 1).
+--
+-- If the list has a "1" or "2" prefix, the operand size is taken
+-- from the respective operand and any other operand sizes are ignored.
+-- If the list contains only ".", all operand sizes are ignored.
+-- If the list has a "/" prefix, the concatenated (mixed) operand sizes
+-- are compared to the match.
+--
+-- E.g. "rrdw" matches for either two dword registers or two word
+-- registers. "Fx2dq" matches an st0 operand plus an index operand
+-- pointing to a dword (float) or qword (double).
+--
+-- Every character after the ":" is part of the pattern string:
+--   Hex chars are accumulated to form the opcode (left to right).
+--   "n"       disables the standard opcode mods
+--             (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
+--   "X"       Force REX.W.
+--   "r"/"R"   adds the reg. number from the 1st/2nd operand to the opcode.
+--   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
+--             The spare 3 bits are either filled with the last hex digit or
+--             the result from a previous "r"/"R". The opcode is restored.
+--
+-- All of the following characters force a flush of the opcode:
+--   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
+--   "S"       stores a signed 8 bit immediate from the last operand.
+--   "U"       stores an unsigned 8 bit immediate from the last operand.
+--   "W"       stores an unsigned 16 bit immediate from the last operand.
+--   "i"       stores an operand sized immediate from the last operand.
+--   "I"       dito, but generates an action code to optionally modify
+--             the opcode (+2) for a signed 8 bit immediate.
+--   "J"       generates one of the REL action codes from the last operand.
+--
+------------------------------------------------------------------------------
+
+-- Template strings for x86 instructions. Ordered by first opcode byte.
+-- Unimplemented opcodes (deliberate omissions) are marked with *.
+local map_op = {
+  -- 00-05: add...
+  -- 06: *push es
+  -- 07: *pop es
+  -- 08-0D: or...
+  -- 0E: *push cs
+  -- 0F: two byte opcode prefix
+  -- 10-15: adc...
+  -- 16: *push ss
+  -- 17: *pop ss
+  -- 18-1D: sbb...
+  -- 1E: *push ds
+  -- 1F: *pop ds
+  -- 20-25: and...
+  es_0 =	"26",
+  -- 27: *daa
+  -- 28-2D: sub...
+  cs_0 =	"2E",
+  -- 2F: *das
+  -- 30-35: xor...
+  ss_0 =	"36",
+  -- 37: *aaa
+  -- 38-3D: cmp...
+  ds_0 =	"3E",
+  -- 3F: *aas
+  inc_1 =	x64 and "m:FF0m" or "rdw:40r|m:FF0m",
+  dec_1 =	x64 and "m:FF1m" or "rdw:48r|m:FF1m",
+  push_1 =	(x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
+			 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
+  pop_1 =	x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
+  -- 60: *pusha, *pushad, *pushaw
+  -- 61: *popa, *popad, *popaw
+  -- 62: *bound rdw,x
+  -- 63: x86: *arpl mw,rw
+  movsxd_2 =	x64 and "rm/qd:63rM",
+  fs_0 =	"64",
+  gs_0 =	"65",
+  o16_0 =	"66",
+  a16_0 =	not x64 and "67" or nil,
+  a32_0 =	x64 and "67",
+  -- 68: push idw
+  -- 69: imul rdw,mdw,idw
+  -- 6A: push ib
+  -- 6B: imul rdw,mdw,S
+  -- 6C: *insb
+  -- 6D: *insd, *insw
+  -- 6E: *outsb
+  -- 6F: *outsd, *outsw
+  -- 70-7F: jcc lb
+  -- 80: add... mb,i
+  -- 81: add... mdw,i
+  -- 82: *undefined
+  -- 83: add... mdw,S
+  test_2 =	"mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
+  -- 86: xchg rb,mb
+  -- 87: xchg rdw,mdw
+  -- 88: mov mb,r
+  -- 89: mov mdw,r
+  -- 8A: mov r,mb
+  -- 8B: mov r,mdw
+  -- 8C: *mov mdw,seg
+  lea_2 =	"rx1dq:8DrM",
+  -- 8E: *mov seg,mdw
+  -- 8F: pop mdw
+  nop_0 =	"90",
+  xchg_2 =	"Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
+  cbw_0 =	"6698",
+  cwde_0 =	"98",
+  cdqe_0 =	"4898",
+  cwd_0 =	"6699",
+  cdq_0 =	"99",
+  cqo_0 =	"4899",
+  -- 9A: *call iw:idw
+  wait_0 =	"9B",
+  fwait_0 =	"9B",
+  pushf_0 =	"9C",
+  pushfd_0 =	not x64 and "9C",
+  pushfq_0 =	x64 and "9C",
+  popf_0 =	"9D",
+  popfd_0 =	not x64 and "9D",
+  popfq_0 =	x64 and "9D",
+  sahf_0 =	"9E",
+  lahf_0 =	"9F",
+  mov_2 =	"OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
+  movsb_0 =	"A4",
+  movsw_0 =	"66A5",
+  movsd_0 =	"A5",
+  cmpsb_0 =	"A6",
+  cmpsw_0 =	"66A7",
+  cmpsd_0 =	"A7",
+  -- A8: test Rb,i
+  -- A9: test Rdw,i
+  stosb_0 =	"AA",
+  stosw_0 =	"66AB",
+  stosd_0 =	"AB",
+  lodsb_0 =	"AC",
+  lodsw_0 =	"66AD",
+  lodsd_0 =	"AD",
+  scasb_0 =	"AE",
+  scasw_0 =	"66AF",
+  scasd_0 =	"AF",
+  -- B0-B7: mov rb,i
+  -- B8-BF: mov rdw,i
+  -- C0: rol... mb,i
+  -- C1: rol... mdw,i
+  ret_1 =	"i.:nC2W",
+  ret_0 =	"C3",
+  -- C4: *les rdw,mq
+  -- C5: *lds rdw,mq
+  -- C6: mov mb,i
+  -- C7: mov mdw,i
+  -- C8: *enter iw,ib
+  leave_0 =	"C9",
+  -- CA: *retf iw
+  -- CB: *retf
+  int3_0 =	"CC",
+  int_1 =	"i.:nCDU",
+  into_0 =	"CE",
+  -- CF: *iret
+  -- D0: rol... mb,1
+  -- D1: rol... mdw,1
+  -- D2: rol... mb,cl
+  -- D3: rol... mb,cl
+  -- D4: *aam ib
+  -- D5: *aad ib
+  -- D6: *salc
+  -- D7: *xlat
+  -- D8-DF: floating point ops
+  -- E0: *loopne
+  -- E1: *loope
+  -- E2: *loop
+  -- E3: *jcxz, *jecxz
+  -- E4: *in Rb,ib
+  -- E5: *in Rdw,ib
+  -- E6: *out ib,Rb
+  -- E7: *out ib,Rdw
+  call_1 =	x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
+  jmp_1 =	x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
+  -- EA: *jmp iw:idw
+  -- EB: jmp ib
+  -- EC: *in Rb,dx
+  -- ED: *in Rdw,dx
+  -- EE: *out dx,Rb
+  -- EF: *out dx,Rdw
+  lock_0 =	"F0",
+  int1_0 =	"F1",
+  repne_0 =	"F2",
+  repnz_0 =	"F2",
+  rep_0 =	"F3",
+  repe_0 =	"F3",
+  repz_0 =	"F3",
+  -- F4: *hlt
+  cmc_0 =	"F5",
+  -- F6: test... mb,i; div... mb
+  -- F7: test... mdw,i; div... mdw
+  clc_0 =	"F8",
+  stc_0 =	"F9",
+  -- FA: *cli
+  cld_0 =	"FC",
+  std_0 =	"FD",
+  -- FE: inc... mb
+  -- FF: inc... mdw
+
+  -- misc ops
+  not_1 =	"m:F72m",
+  neg_1 =	"m:F73m",
+  mul_1 =	"m:F74m",
+  imul_1 =	"m:F75m",
+  div_1 =	"m:F76m",
+  idiv_1 =	"m:F77m",
+
+  imul_2 =	"rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
+  imul_3 =	"rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
+
+  movzx_2 =	"rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
+  movsx_2 =	"rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
+
+  bswap_1 =	"rqd:0FC8r",
+  bsf_2 =	"rmqdw:0FBCrM",
+  bsr_2 =	"rmqdw:0FBDrM",
+  bt_2 =	"mrqdw:0FA3Rm|miqdw:0FBA4mU",
+  btc_2 =	"mrqdw:0FBBRm|miqdw:0FBA7mU",
+  btr_2 =	"mrqdw:0FB3Rm|miqdw:0FBA6mU",
+  bts_2 =	"mrqdw:0FABRm|miqdw:0FBA5mU",
+
+  shld_3 =	"mriqdw:0FA4RmU|mrCqdw:0FA5Rm",
+  shrd_3 =	"mriqdw:0FACRmU|mrCqdw:0FADRm",
+
+  rdtsc_0 =	"0F31", -- P1+
+  cpuid_0 =	"0FA2", -- P1+
+
+  -- floating point ops
+  fst_1 =	"ff:DDD0r|xd:D92m|xq:nDD2m",
+  fstp_1 =	"ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
+  fld_1 =	"ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
+
+  fpop_0 =	"DDD8", -- Alias for fstp st0.
+
+  fist_1 =	"xw:nDF2m|xd:DB2m",
+  fistp_1 =	"xw:nDF3m|xd:DB3m|xq:nDF7m",
+  fild_1 =	"xw:nDF0m|xd:DB0m|xq:nDF5m",
+
+  fxch_0 =	"D9C9",
+  fxch_1 =	"ff:D9C8r",
+  fxch_2 =	"fFf:D9C8r|Fff:D9C8R",
+
+  fucom_1 =	"ff:DDE0r",
+  fucom_2 =	"Fff:DDE0R",
+  fucomp_1 =	"ff:DDE8r",
+  fucomp_2 =	"Fff:DDE8R",
+  fucomi_1 =	"ff:DBE8r", -- P6+
+  fucomi_2 =	"Fff:DBE8R", -- P6+
+  fucomip_1 =	"ff:DFE8r", -- P6+
+  fucomip_2 =	"Fff:DFE8R", -- P6+
+  fcomi_1 =	"ff:DBF0r", -- P6+
+  fcomi_2 =	"Fff:DBF0R", -- P6+
+  fcomip_1 =	"ff:DFF0r", -- P6+
+  fcomip_2 =	"Fff:DFF0R", -- P6+
+  fucompp_0 =	"DAE9",
+  fcompp_0 =	"DED9",
+
+  fldenv_1 =	"x.:D94m",
+  fnstenv_1 =	"x.:D96m",
+  fstenv_1 =	"x.:9BD96m",
+  fldcw_1 =	"xw:nD95m",
+  fstcw_1 =	"xw:n9BD97m",
+  fnstcw_1 =	"xw:nD97m",
+  fstsw_1 =	"Rw:n9BDFE0|xw:n9BDD7m",
+  fnstsw_1 =	"Rw:nDFE0|xw:nDD7m",
+  fclex_0 =	"9BDBE2",
+  fnclex_0 =	"DBE2",
+
+  fnop_0 =	"D9D0",
+  -- D9D1-D9DF: unassigned
+
+  fchs_0 =	"D9E0",
+  fabs_0 =	"D9E1",
+  -- D9E2: unassigned
+  -- D9E3: unassigned
+  ftst_0 =	"D9E4",
+  fxam_0 =	"D9E5",
+  -- D9E6: unassigned
+  -- D9E7: unassigned
+  fld1_0 =	"D9E8",
+  fldl2t_0 =	"D9E9",
+  fldl2e_0 =	"D9EA",
+  fldpi_0 =	"D9EB",
+  fldlg2_0 =	"D9EC",
+  fldln2_0 =	"D9ED",
+  fldz_0 =	"D9EE",
+  -- D9EF: unassigned
+
+  f2xm1_0 =	"D9F0",
+  fyl2x_0 =	"D9F1",
+  fptan_0 =	"D9F2",
+  fpatan_0 =	"D9F3",
+  fxtract_0 =	"D9F4",
+  fprem1_0 =	"D9F5",
+  fdecstp_0 =	"D9F6",
+  fincstp_0 =	"D9F7",
+  fprem_0 =	"D9F8",
+  fyl2xp1_0 =	"D9F9",
+  fsqrt_0 =	"D9FA",
+  fsincos_0 =	"D9FB",
+  frndint_0 =	"D9FC",
+  fscale_0 =	"D9FD",
+  fsin_0 =	"D9FE",
+  fcos_0 =	"D9FF",
+
+  -- SSE, SSE2
+  andnpd_2 =	"rmo:660F55rM",
+  andnps_2 =	"rmo:0F55rM",
+  andpd_2 =	"rmo:660F54rM",
+  andps_2 =	"rmo:0F54rM",
+  clflush_1 =	"x.:0FAE7m",
+  cmppd_3 =	"rmio:660FC2rMU",
+  cmpps_3 =	"rmio:0FC2rMU",
+  cmpsd_3 =	"rrio:F20FC2rMU|rxi/oq:",
+  cmpss_3 =	"rrio:F30FC2rMU|rxi/od:",
+  comisd_2 =	"rro:660F2FrM|rx/oq:",
+  comiss_2 =	"rro:0F2FrM|rx/od:",
+  cvtdq2pd_2 =	"rro:F30FE6rM|rx/oq:",
+  cvtdq2ps_2 =	"rmo:0F5BrM",
+  cvtpd2dq_2 =	"rmo:F20FE6rM",
+  cvtpd2ps_2 =	"rmo:660F5ArM",
+  cvtpi2pd_2 =	"rx/oq:660F2ArM",
+  cvtpi2ps_2 =	"rx/oq:0F2ArM",
+  cvtps2dq_2 =	"rmo:660F5BrM",
+  cvtps2pd_2 =	"rro:0F5ArM|rx/oq:",
+  cvtsd2si_2 =	"rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
+  cvtsd2ss_2 =	"rro:F20F5ArM|rx/oq:",
+  cvtsi2sd_2 =	"rm/od:F20F2ArM|rm/oq:F20F2ArXM",
+  cvtsi2ss_2 =	"rm/od:F30F2ArM|rm/oq:F30F2ArXM",
+  cvtss2sd_2 =	"rro:F30F5ArM|rx/od:",
+  cvtss2si_2 =	"rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
+  cvttpd2dq_2 =	"rmo:660FE6rM",
+  cvttps2dq_2 =	"rmo:F30F5BrM",
+  cvttsd2si_2 =	"rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
+  cvttss2si_2 =	"rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
+  fxsave_1 =	"x.:0FAE0m",
+  fxrstor_1 =	"x.:0FAE1m",
+  ldmxcsr_1 =	"xd:0FAE2m",
+  lfence_0 =	"0FAEE8",
+  maskmovdqu_2 = "rro:660FF7rM",
+  mfence_0 =	"0FAEF0",
+  movapd_2 =	"rmo:660F28rM|mro:660F29Rm",
+  movaps_2 =	"rmo:0F28rM|mro:0F29Rm",
+  movd_2 =	"rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
+  movdqa_2 =	"rmo:660F6FrM|mro:660F7FRm",
+  movdqu_2 =	"rmo:F30F6FrM|mro:F30F7FRm",
+  movhlps_2 =	"rro:0F12rM",
+  movhpd_2 =	"rx/oq:660F16rM|xr/qo:n660F17Rm",
+  movhps_2 =	"rx/oq:0F16rM|xr/qo:n0F17Rm",
+  movlhps_2 =	"rro:0F16rM",
+  movlpd_2 =	"rx/oq:660F12rM|xr/qo:n660F13Rm",
+  movlps_2 =	"rx/oq:0F12rM|xr/qo:n0F13Rm",
+  movmskpd_2 =	"rr/do:660F50rM",
+  movmskps_2 =	"rr/do:0F50rM",
+  movntdq_2 =	"xro:660FE7Rm",
+  movnti_2 =	"xrqd:0FC3Rm",
+  movntpd_2 =	"xro:660F2BRm",
+  movntps_2 =	"xro:0F2BRm",
+  movq_2 =	"rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
+  movsd_2 =	"rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
+  movss_2 =	"rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
+  movupd_2 =	"rmo:660F10rM|mro:660F11Rm",
+  movups_2 =	"rmo:0F10rM|mro:0F11Rm",
+  orpd_2 =	"rmo:660F56rM",
+  orps_2 =	"rmo:0F56rM",
+  packssdw_2 =	"rmo:660F6BrM",
+  packsswb_2 =	"rmo:660F63rM",
+  packuswb_2 =	"rmo:660F67rM",
+  paddb_2 =	"rmo:660FFCrM",
+  paddd_2 =	"rmo:660FFErM",
+  paddq_2 =	"rmo:660FD4rM",
+  paddsb_2 =	"rmo:660FECrM",
+  paddsw_2 =	"rmo:660FEDrM",
+  paddusb_2 =	"rmo:660FDCrM",
+  paddusw_2 =	"rmo:660FDDrM",
+  paddw_2 =	"rmo:660FFDrM",
+  pand_2 =	"rmo:660FDBrM",
+  pandn_2 =	"rmo:660FDFrM",
+  pause_0 =	"F390",
+  pavgb_2 =	"rmo:660FE0rM",
+  pavgw_2 =	"rmo:660FE3rM",
+  pcmpeqb_2 =	"rmo:660F74rM",
+  pcmpeqd_2 =	"rmo:660F76rM",
+  pcmpeqw_2 =	"rmo:660F75rM",
+  pcmpgtb_2 =	"rmo:660F64rM",
+  pcmpgtd_2 =	"rmo:660F66rM",
+  pcmpgtw_2 =	"rmo:660F65rM",
+  pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
+  pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
+  pmaddwd_2 =	"rmo:660FF5rM",
+  pmaxsw_2 =	"rmo:660FEErM",
+  pmaxub_2 =	"rmo:660FDErM",
+  pminsw_2 =	"rmo:660FEArM",
+  pminub_2 =	"rmo:660FDArM",
+  pmovmskb_2 =	"rr/do:660FD7rM",
+  pmulhuw_2 =	"rmo:660FE4rM",
+  pmulhw_2 =	"rmo:660FE5rM",
+  pmullw_2 =	"rmo:660FD5rM",
+  pmuludq_2 =	"rmo:660FF4rM",
+  por_2 =	"rmo:660FEBrM",
+  prefetchnta_1 = "xb:n0F180m",
+  prefetcht0_1 = "xb:n0F181m",
+  prefetcht1_1 = "xb:n0F182m",
+  prefetcht2_1 = "xb:n0F183m",
+  psadbw_2 =	"rmo:660FF6rM",
+  pshufd_3 =	"rmio:660F70rMU",
+  pshufhw_3 =	"rmio:F30F70rMU",
+  pshuflw_3 =	"rmio:F20F70rMU",
+  pslld_2 =	"rmo:660FF2rM|rio:660F726mU",
+  pslldq_2 =	"rio:660F737mU",
+  psllq_2 =	"rmo:660FF3rM|rio:660F736mU",
+  psllw_2 =	"rmo:660FF1rM|rio:660F716mU",
+  psrad_2 =	"rmo:660FE2rM|rio:660F724mU",
+  psraw_2 =	"rmo:660FE1rM|rio:660F714mU",
+  psrld_2 =	"rmo:660FD2rM|rio:660F722mU",
+  psrldq_2 =	"rio:660F733mU",
+  psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
+  psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
+  psubb_2 =	"rmo:660FF8rM",
+  psubd_2 =	"rmo:660FFArM",
+  psubq_2 =	"rmo:660FFBrM",
+  psubsb_2 =	"rmo:660FE8rM",
+  psubsw_2 =	"rmo:660FE9rM",
+  psubusb_2 =	"rmo:660FD8rM",
+  psubusw_2 =	"rmo:660FD9rM",
+  psubw_2 =	"rmo:660FF9rM",
+  punpckhbw_2 =	"rmo:660F68rM",
+  punpckhdq_2 =	"rmo:660F6ArM",
+  punpckhqdq_2 = "rmo:660F6DrM",
+  punpckhwd_2 =	"rmo:660F69rM",
+  punpcklbw_2 =	"rmo:660F60rM",
+  punpckldq_2 =	"rmo:660F62rM",
+  punpcklqdq_2 = "rmo:660F6CrM",
+  punpcklwd_2 =	"rmo:660F61rM",
+  pxor_2 =	"rmo:660FEFrM",
+  rcpps_2 =	"rmo:0F53rM",
+  rcpss_2 =	"rro:F30F53rM|rx/od:",
+  rsqrtps_2 =	"rmo:0F52rM",
+  rsqrtss_2 =	"rmo:F30F52rM",
+  sfence_0 =	"0FAEF8",
+  shufpd_3 =	"rmio:660FC6rMU",
+  shufps_3 =	"rmio:0FC6rMU",
+  stmxcsr_1 =   "xd:0FAE3m",
+  ucomisd_2 =	"rro:660F2ErM|rx/oq:",
+  ucomiss_2 =	"rro:0F2ErM|rx/od:",
+  unpckhpd_2 =	"rmo:660F15rM",
+  unpckhps_2 =	"rmo:0F15rM",
+  unpcklpd_2 =	"rmo:660F14rM",
+  unpcklps_2 =	"rmo:0F14rM",
+  xorpd_2 =	"rmo:660F57rM",
+  xorps_2 =	"rmo:0F57rM",
+
+  -- SSE3 ops
+  fisttp_1 =	"xw:nDF1m|xd:DB1m|xq:nDD1m",
+  addsubpd_2 =	"rmo:660FD0rM",
+  addsubps_2 =	"rmo:F20FD0rM",
+  haddpd_2 =	"rmo:660F7CrM",
+  haddps_2 =	"rmo:F20F7CrM",
+  hsubpd_2 =	"rmo:660F7DrM",
+  hsubps_2 =	"rmo:F20F7DrM",
+  lddqu_2 =	"rxo:F20FF0rM",
+  movddup_2 =	"rmo:F20F12rM",
+  movshdup_2 =	"rmo:F30F16rM",
+  movsldup_2 =	"rmo:F30F12rM",
+
+  -- SSSE3 ops
+  pabsb_2 =	"rmo:660F381CrM",
+  pabsd_2 =	"rmo:660F381ErM",
+  pabsw_2 =	"rmo:660F381DrM",
+  palignr_3 =	"rmio:660F3A0FrMU",
+  phaddd_2 =	"rmo:660F3802rM",
+  phaddsw_2 =	"rmo:660F3803rM",
+  phaddw_2 =	"rmo:660F3801rM",
+  phsubd_2 =	"rmo:660F3806rM",
+  phsubsw_2 =	"rmo:660F3807rM",
+  phsubw_2 =	"rmo:660F3805rM",
+  pmaddubsw_2 =	"rmo:660F3804rM",
+  pmulhrsw_2 =	"rmo:660F380BrM",
+  pshufb_2 =	"rmo:660F3800rM",
+  psignb_2 =	"rmo:660F3808rM",
+  psignd_2 =	"rmo:660F380ArM",
+  psignw_2 =	"rmo:660F3809rM",
+
+  -- SSE4.1 ops
+  blendpd_3 =	"rmio:660F3A0DrMU",
+  blendps_3 =	"rmio:660F3A0CrMU",
+  blendvpd_3 =	"rmRo:660F3815rM",
+  blendvps_3 =	"rmRo:660F3814rM",
+  dppd_3 =	"rmio:660F3A41rMU",
+  dpps_3 =	"rmio:660F3A40rMU",
+  extractps_3 =	"mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
+  insertps_3 =	"rrio:660F3A41rMU|rxi/od:",
+  movntdqa_2 =	"rmo:660F382ArM",
+  mpsadbw_3 =	"rmio:660F3A42rMU",
+  packusdw_2 =	"rmo:660F382BrM",
+  pblendvb_3 =	"rmRo:660F3810rM",
+  pblendw_3 =	"rmio:660F3A0ErMU",
+  pcmpeqq_2 =	"rmo:660F3829rM",
+  pextrb_3 =	"rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
+  pextrd_3 =	"mri/do:660F3A16RmU",
+  pextrq_3 =	"mri/qo:660F3A16RmU",
+  -- pextrw is SSE2, mem operand is SSE4.1 only
+  phminposuw_2 = "rmo:660F3841rM",
+  pinsrb_3 =	"rri/od:660F3A20nrMU|rxi/ob:",
+  pinsrd_3 =	"rmi/od:660F3A22rMU",
+  pinsrq_3 =	"rmi/oq:660F3A22rXMU",
+  pmaxsb_2 =	"rmo:660F383CrM",
+  pmaxsd_2 =	"rmo:660F383DrM",
+  pmaxud_2 =	"rmo:660F383FrM",
+  pmaxuw_2 =	"rmo:660F383ErM",
+  pminsb_2 =	"rmo:660F3838rM",
+  pminsd_2 =	"rmo:660F3839rM",
+  pminud_2 =	"rmo:660F383BrM",
+  pminuw_2 =	"rmo:660F383ArM",
+  pmovsxbd_2 =	"rro:660F3821rM|rx/od:",
+  pmovsxbq_2 =	"rro:660F3822rM|rx/ow:",
+  pmovsxbw_2 =	"rro:660F3820rM|rx/oq:",
+  pmovsxdq_2 =	"rro:660F3825rM|rx/oq:",
+  pmovsxwd_2 =	"rro:660F3823rM|rx/oq:",
+  pmovsxwq_2 =	"rro:660F3824rM|rx/od:",
+  pmovzxbd_2 =	"rro:660F3831rM|rx/od:",
+  pmovzxbq_2 =	"rro:660F3832rM|rx/ow:",
+  pmovzxbw_2 =	"rro:660F3830rM|rx/oq:",
+  pmovzxdq_2 =	"rro:660F3835rM|rx/oq:",
+  pmovzxwd_2 =	"rro:660F3833rM|rx/oq:",
+  pmovzxwq_2 =	"rro:660F3834rM|rx/od:",
+  pmuldq_2 =	"rmo:660F3828rM",
+  pmulld_2 =	"rmo:660F3840rM",
+  ptest_2 =	"rmo:660F3817rM",
+  roundpd_3 =	"rmio:660F3A09rMU",
+  roundps_3 =	"rmio:660F3A08rMU",
+  roundsd_3 =	"rrio:660F3A0BrMU|rxi/oq:",
+  roundss_3 =	"rrio:660F3A0ArMU|rxi/od:",
+
+  -- SSE4.2 ops
+  crc32_2 =	"rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
+  pcmpestri_3 =	"rmio:660F3A61rMU",
+  pcmpestrm_3 =	"rmio:660F3A60rMU",
+  pcmpgtq_2 =	"rmo:660F3837rM",
+  pcmpistri_3 =	"rmio:660F3A63rMU",
+  pcmpistrm_3 =	"rmio:660F3A62rMU",
+  popcnt_2 =	"rmqdw:F30FB8rM",
+
+  -- SSE4a
+  extrq_2 =	"rro:660F79rM",
+  extrq_3 =	"riio:660F780mUU",
+  insertq_2 =	"rro:F20F79rM",
+  insertq_4 =	"rriio:F20F78rMUU",
+  lzcnt_2 =	"rmqdw:F30FBDrM",
+  movntsd_2 =	"xr/qo:nF20F2BRm",
+  movntss_2 =	"xr/do:F30F2BRm",
+  -- popcnt is also in SSE4.2
+}
+
+------------------------------------------------------------------------------
+
+-- Arithmetic ops.
+for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
+		     ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
+  local n8 = shl(n, 3)
+  map_op[name.."_2"] = format(
+    "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
+    1+n8, 3+n8, n, n, 5+n8, n)
+end
+
+-- Shift ops.
+for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
+		     shl = 4, shr = 5,          sar = 7, sal = 4 } do
+  map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n)
+end
+
+-- Conditional ops.
+for cc,n in pairs(map_cc) do
+  map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X
+  map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
+  map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+
+end
+
+-- FP arithmetic ops.
+for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
+		     sub = 4, subr = 5, div = 6, divr = 7 } do
+  local nc = 0xc0 + shl(n, 3)
+  local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
+  local fn = "f"..name
+  map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
+  if n == 2 or n == 3 then
+    map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n)
+  else
+    map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n)
+    map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
+    map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
+  end
+  map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
+end
+
+-- FP conditional moves.
+for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
+  local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6)
+  map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
+  map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
+end
+
+-- SSE FP arithmetic ops.
+for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
+		     sub = 12, min = 13, div = 14, max = 15 } do
+  map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
+  map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
+  map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
+  map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
+end
+
+------------------------------------------------------------------------------
+
+-- Process pattern string.
+local function dopattern(pat, args, sz, op, needrex)
+  local digit, addin
+  local opcode = 0
+  local szov = sz
+  local narg = 1
+  local rex = 0
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 5 positions.
+  if secpos+5 > maxsecpos then wflush() end
+
+  -- Process each character.
+  for c in gmatch(pat.."|", ".") do
+    if match(c, "%x") then	-- Hex digit.
+      digit = byte(c) - 48
+      if digit > 48 then digit = digit - 39
+      elseif digit > 16 then digit = digit - 7 end
+      opcode = opcode*16 + digit
+      addin = nil
+    elseif c == "n" then	-- Disable operand size mods for opcode.
+      szov = nil
+    elseif c == "X" then	-- Force REX.W.
+      rex = 8
+    elseif c == "r" then	-- Merge 1st operand regno. into opcode.
+      addin = args[1]; opcode = opcode + (addin.reg % 8)
+      if narg < 2 then narg = 2 end
+    elseif c == "R" then	-- Merge 2nd operand regno. into opcode.
+      addin = args[2]; opcode = opcode + (addin.reg % 8)
+      narg = 3
+    elseif c == "m" or c == "M" then	-- Encode ModRM/SIB.
+      local s
+      if addin then
+	s = addin.reg
+	opcode = opcode - band(s, 7)	-- Undo regno opcode merge.
+      else
+	s = band(opcode, 15)	-- Undo last digit.
+	opcode = shr(opcode, 4)
+      end
+      local nn = c == "m" and 1 or 2
+      local t = args[nn]
+      if narg <= nn then narg = nn + 1 end
+      if szov == "q" and rex == 0 then rex = rex + 8 end
+      if t.reg and t.reg > 7 then rex = rex + 1 end
+      if t.xreg and t.xreg > 7 then rex = rex + 2 end
+      if s > 7 then rex = rex + 4 end
+      if needrex then rex = rex + 16 end
+      wputop(szov, opcode, rex); opcode = nil
+      local imark = sub(pat, -1) -- Force a mark (ugly).
+      -- Put ModRM/SIB with regno/last digit as spare.
+      wputmrmsib(t, imark, s, addin and addin.vreg)
+      addin = nil
+    else
+      if opcode then -- Flush opcode.
+	if szov == "q" and rex == 0 then rex = rex + 8 end
+	if needrex then rex = rex + 16 end
+	if addin and addin.reg == -1 then
+	  wputop(szov, opcode - 7, rex)
+	  waction("VREG", addin.vreg); wputxb(0)
+	else
+	  if addin and addin.reg > 7 then rex = rex + 1 end
+	  wputop(szov, opcode, rex)
+	end
+	opcode = nil
+      end
+      if c == "|" then break end
+      if c == "o" then -- Offset (pure 32 bit displacement).
+	wputdarg(args[1].disp); if narg < 2 then narg = 2 end
+      elseif c == "O" then
+	wputdarg(args[2].disp); narg = 3
+      else
+	-- Anything else is an immediate operand.
+	local a = args[narg]
+	narg = narg + 1
+	local mode, imm = a.mode, a.imm
+	if mode == "iJ" and not match("iIJ", c) then
+	  werror("bad operand size for label")
+	end
+	if c == "S" then
+	  wputsbarg(imm)
+	elseif c == "U" then
+	  wputbarg(imm)
+	elseif c == "W" then
+	  wputwarg(imm)
+	elseif c == "i" or c == "I" then
+	  if mode == "iJ" then
+	    wputlabel("IMM_", imm, 1)
+	  elseif mode == "iI" and c == "I" then
+	    waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
+	  else
+	    wputszarg(sz, imm)
+	  end
+	elseif c == "J" then
+	  if mode == "iPJ" then
+	    waction("REL_A", imm) -- !x64 (secpos)
+	  else
+	    wputlabel("REL_", imm, 2)
+	  end
+	else
+	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
+	end
+      end
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Mapping of operand modes to short names. Suppress output with '#'.
+local map_modename = {
+  r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
+  f = "stx", F = "st0", J = "lbl", ["1"] = "1",
+  I = "#", S = "#", O = "#",
+}
+
+-- Return a table/string showing all possible operand modes.
+local function templatehelp(template, nparams)
+  if nparams == 0 then return "" end
+  local t = {}
+  for tm in gmatch(template, "[^%|]+") do
+    local s = map_modename[sub(tm, 1, 1)]
+    s = s..gsub(sub(tm, 2, nparams), ".", function(c)
+      return ", "..map_modename[c]
+    end)
+    if not match(s, "#") then t[#t+1] = s end
+  end
+  return t
+end
+
+-- Match operand modes against mode match part of template.
+local function matchtm(tm, args)
+  for i=1,#args do
+    if not match(args[i].mode, sub(tm, i, i)) then return end
+  end
+  return true
+end
+
+-- Handle opcodes defined with template strings.
+map_op[".template__"] = function(params, template, nparams)
+  if not params then return templatehelp(template, nparams) end
+  local args = {}
+
+  -- Zero-operand opcodes have no match part.
+  if #params == 0 then
+    dopattern(template, args, "d", params.op, nil)
+    return
+  end
+
+  -- Determine common operand size (coerce undefined size) or flag as mixed.
+  local sz, szmix, needrex
+  for i,p in ipairs(params) do
+    args[i] = parseoperand(p)
+    local nsz = args[i].opsize
+    if nsz then
+      if sz and sz ~= nsz then szmix = true else sz = nsz end
+    end
+    local nrex = args[i].needrex
+    if nrex ~= nil then
+      if needrex == nil then
+	needrex = nrex
+      elseif needrex ~= nrex then
+	werror("bad mix of byte-addressable registers")
+      end
+    end
+  end
+
+  -- Try all match:pattern pairs (separated by '|').
+  local gotmatch, lastpat
+  for tm in gmatch(template, "[^%|]+") do
+    -- Split off size match (starts after mode match) and pattern string.
+    local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
+    if pat == "" then pat = lastpat else lastpat = pat end
+    if matchtm(tm, args) then
+      local prefix = sub(szm, 1, 1)
+      if prefix == "/" then -- Match both operand sizes.
+	if args[1].opsize == sub(szm, 2, 2) and
+	   args[2].opsize == sub(szm, 3, 3) then
+	  dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
+	  return
+	end
+      else -- Match common operand size.
+	local szp = sz
+	if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
+	if prefix == "1" then szp = args[1].opsize; szmix = nil
+	elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
+	if not szmix and (prefix == "." or match(szm, szp or "#")) then
+	  dopattern(pat, args, szp, params.op, needrex) -- Process pattern.
+	  return
+	end
+      end
+      gotmatch = true
+    end
+  end
+
+  local msg = "bad operand mode"
+  if gotmatch then
+    if szmix then
+      msg = "mixed operand size"
+    else
+      msg = sz and "bad operand size" or "missing operand size"
+    end
+  end
+
+  werror(msg.." in `"..opmodestr(params.op, args).."'")
+end
+
+------------------------------------------------------------------------------
+
+-- x64-specific opcode for 64 bit immediates and displacements.
+if x64 then
+  function map_op.mov64_2(params)
+    if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
+    if secpos+2 > maxsecpos then wflush() end
+    local opcode, op64, sz, rex, vreg
+    local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
+    if op64 then
+      local a = parseoperand(params[2])
+      if a.mode ~= "rmR" then werror("bad operand mode") end
+      sz = a.opsize
+      rex = sz == "q" and 8 or 0
+      opcode = 0xa3
+    else
+      op64 = match(params[2], "^%[%s*(.-)%s*%]$")
+      local a = parseoperand(params[1])
+      if op64 then
+	if a.mode ~= "rmR" then werror("bad operand mode") end
+	sz = a.opsize
+	rex = sz == "q" and 8 or 0
+	opcode = 0xa1
+      else
+	if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then
+	  werror("bad operand mode")
+	end
+	op64 = params[2]
+	if a.reg == -1 then
+	  vreg = a.vreg
+	  opcode = 0xb8
+	else
+	  opcode = 0xb8 + band(a.reg, 7)
+	end
+	rex = a.reg > 7 and 9 or 8
+      end
+    end
+    wputop(sz, opcode, rex)
+    if vreg then waction("VREG", vreg); wputxb(0) end
+    waction("IMM_D", format("(unsigned int)(%s)", op64))
+    waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+local function op_data(params)
+  if not params then return "imm..." end
+  local sz = sub(params.op, 2, 2)
+  if sz == "a" then sz = addrsize end
+  for _,p in ipairs(params) do
+    local a = parseoperand(p)
+    if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
+      werror("bad mode or size in `"..p.."'")
+    end
+    if a.mode == "iJ" then
+      wputlabel("IMM_", a.imm, 1)
+    else
+      wputszarg(sz, a.imm)
+    end
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+map_op[".byte_*"] = op_data
+map_op[".sbyte_*"] = op_data
+map_op[".word_*"] = op_data
+map_op[".dword_*"] = op_data
+map_op[".aword_*"] = op_data
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_2"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr  [, addr]" end
+  if secpos+2 > maxsecpos then wflush() end
+  local a = parseoperand(params[1])
+  local mode, imm = a.mode, a.imm
+  if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
+    -- Local label (1: ... 9:) or global label (->global:).
+    waction("LABEL_LG", nil, 1)
+    wputxb(imm)
+  elseif mode == "iJ" then
+    -- PC label (=>pcexpr:).
+    waction("LABEL_PC", imm)
+  else
+    werror("bad label definition")
+  end
+  -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
+  local addr = params[2]
+  if addr then
+    local a = parseoperand(addr)
+    if a.mode == "iPJ" then
+      waction("SETLABEL", a.imm)
+    else
+      werror("bad label assignment")
+    end
+  end
+end
+map_op[".label_1"] = map_op[".label_2"]
+
+------------------------------------------------------------------------------
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", nil, 1)
+	wputxb(align-1) -- Action byte is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+-- Spacing pseudo-opcode.
+map_op[".space_2"] = function(params)
+  if not params then return "num [, filler]" end
+  if secpos+1 > maxsecpos then wflush() end
+  waction("SPACE", params[1])
+  local fill = params[2]
+  if fill then
+    fill = tonumber(fill)
+    if not fill or fill < 0 or fill > 255 then werror("bad filler") end
+  end
+  wputxb(fill or 0)
+end
+map_op[".space_1"] = map_op[".space_2"]
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  if reg and not map_reg_valid_base[reg] then
+    werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg and map_reg_rev[tp.reg] or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION")
+  wputxb(num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpregs(out)
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+

+ 1094 - 0
luajit.mod/luajit/dynasm/dynasm.lua

@@ -0,0 +1,1094 @@
+------------------------------------------------------------------------------
+-- DynASM. A dynamic assembler for code generation engines.
+-- Originally designed and implemented for LuaJIT.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See below for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Application information.
+local _info = {
+  name =	"DynASM",
+  description =	"A dynamic assembler for code generation engines",
+  version =	"1.3.0",
+  vernum =	 10300,
+  release =	"2011-05-05",
+  author =	"Mike Pall",
+  url =		"http://luajit.org/dynasm.html",
+  license =	"MIT",
+  copyright =	[[
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+[ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+]],
+}
+
+-- Cache library functions.
+local type, pairs, ipairs = type, pairs, ipairs
+local pcall, error, assert = pcall, error, assert
+local _s = string
+local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub
+local format, rep, upper = _s.format, _s.rep, _s.upper
+local _t = table
+local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort
+local exit = os.exit
+local io = io
+local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr
+
+------------------------------------------------------------------------------
+
+-- Program options.
+local g_opt = {}
+
+-- Global state for current file.
+local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch
+local g_errcount = 0
+
+-- Write buffer for output file.
+local g_wbuffer, g_capbuffer
+
+------------------------------------------------------------------------------
+
+-- Write an output line (or callback function) to the buffer.
+local function wline(line, needindent)
+  local buf = g_capbuffer or g_wbuffer
+  buf[#buf+1] = needindent and g_indent..line or line
+  g_synclineno = g_synclineno + 1
+end
+
+-- Write assembler line as a comment, if requestd.
+local function wcomment(aline)
+  if g_opt.comment then
+    wline(g_opt.comment..aline..g_opt.endcomment, true)
+  end
+end
+
+-- Resync CPP line numbers.
+local function wsync()
+  if g_synclineno ~= g_lineno and g_opt.cpp then
+    wline("#line "..g_lineno..' "'..g_fname..'"')
+    g_synclineno = g_lineno
+  end
+end
+
+-- Dummy action flush function. Replaced with arch-specific function later.
+local function wflush(term)
+end
+
+-- Dump all buffered output lines.
+local function wdumplines(out, buf)
+  for _,line in ipairs(buf) do
+    if type(line) == "string" then
+      assert(out:write(line, "\n"))
+    else
+      -- Special callback to dynamically insert lines after end of processing.
+      line(out)
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Emit an error. Processing continues with next statement.
+local function werror(msg)
+  error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0)
+end
+
+-- Emit a fatal error. Processing stops.
+local function wfatal(msg)
+  g_errcount = "fatal"
+  werror(msg)
+end
+
+-- Print a warning. Processing continues.
+local function wwarn(msg)
+  stderr:write(format("%s:%s: warning: %s:\n%s\n",
+    g_fname, g_lineno, msg, g_curline))
+end
+
+-- Print caught error message. But suppress excessive errors.
+local function wprinterr(...)
+  if type(g_errcount) == "number" then
+    -- Regular error.
+    g_errcount = g_errcount + 1
+    if g_errcount < 21 then -- Seems to be a reasonable limit.
+      stderr:write(...)
+    elseif g_errcount == 21 then
+      stderr:write(g_fname,
+	":*: warning: too many errors (suppressed further messages).\n")
+    end
+  else
+    -- Fatal error.
+    stderr:write(...)
+    return true -- Stop processing.
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Map holding all option handlers.
+local opt_map = {}
+local opt_current
+
+-- Print error and exit with error status.
+local function opterror(...)
+  stderr:write("dynasm.lua: ERROR: ", ...)
+  stderr:write("\n")
+  exit(1)
+end
+
+-- Get option parameter.
+local function optparam(args)
+  local argn = args.argn
+  local p = args[argn]
+  if not p then
+    opterror("missing parameter for option `", opt_current, "'.")
+  end
+  args.argn = argn + 1
+  return p
+end
+
+------------------------------------------------------------------------------
+
+-- Core pseudo-opcodes.
+local map_coreop = {}
+-- Dummy opcode map. Replaced by arch-specific map.
+local map_op = {}
+
+-- Forward declarations.
+local dostmt
+local readfile
+
+------------------------------------------------------------------------------
+
+-- Map for defines (initially empty, chains to arch-specific map).
+local map_def = {}
+
+-- Pseudo-opcode to define a substitution.
+map_coreop[".define_2"] = function(params, nparams)
+  if not params then return nparams == 1 and "name" or "name, subst" end
+  local name, def = params[1], params[2] or "1"
+  if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end
+  map_def[name] = def
+end
+map_coreop[".define_1"] = map_coreop[".define_2"]
+
+-- Define a substitution on the command line.
+function opt_map.D(args)
+  local namesubst = optparam(args)
+  local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$")
+  if name then
+    map_def[name] = subst
+  elseif match(namesubst, "^[%a_][%w_]*$") then
+    map_def[namesubst] = "1"
+  else
+    opterror("bad define")
+  end
+end
+
+-- Undefine a substitution on the command line.
+function opt_map.U(args)
+  local name = optparam(args)
+  if match(name, "^[%a_][%w_]*$") then
+    map_def[name] = nil
+  else
+    opterror("bad define")
+  end
+end
+
+-- Helper for definesubst.
+local gotsubst
+
+local function definesubst_one(word)
+  local subst = map_def[word]
+  if subst then gotsubst = word; return subst else return word end
+end
+
+-- Iteratively substitute defines.
+local function definesubst(stmt)
+  -- Limit number of iterations.
+  for i=1,100 do
+    gotsubst = false
+    stmt = gsub(stmt, "#?[%w_]+", definesubst_one)
+    if not gotsubst then break end
+  end
+  if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end
+  return stmt
+end
+
+-- Dump all defines.
+local function dumpdefines(out, lvl)
+  local t = {}
+  for name in pairs(map_def) do
+    t[#t+1] = name
+  end
+  sort(t)
+  out:write("Defines:\n")
+  for _,name in ipairs(t) do
+    local subst = map_def[name]
+    if g_arch then subst = g_arch.revdef(subst) end
+    out:write(format("  %-20s %s\n", name, subst))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Support variables for conditional assembly.
+local condlevel = 0
+local condstack = {}
+
+-- Evaluate condition with a Lua expression. Substitutions already performed.
+local function cond_eval(cond)
+  local func, err
+  if setfenv then
+    func, err = loadstring("return "..cond, "=expr")
+  else
+    -- No globals. All unknown identifiers evaluate to nil.
+    func, err = load("return "..cond, "=expr", "t", {})
+  end
+  if func then
+    if setfenv then
+      setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil.
+    end
+    local ok, res = pcall(func)
+    if ok then
+      if res == 0 then return false end -- Oh well.
+      return not not res
+    end
+    err = res
+  end
+  wfatal("bad condition: "..err)
+end
+
+-- Skip statements until next conditional pseudo-opcode at the same level.
+local function stmtskip()
+  local dostmt_save = dostmt
+  local lvl = 0
+  dostmt = function(stmt)
+    local op = match(stmt, "^%s*(%S+)")
+    if op == ".if" then
+      lvl = lvl + 1
+    elseif lvl ~= 0 then
+      if op == ".endif" then lvl = lvl - 1 end
+    elseif op == ".elif" or op == ".else" or op == ".endif" then
+      dostmt = dostmt_save
+      dostmt(stmt)
+    end
+  end
+end
+
+-- Pseudo-opcodes for conditional assembly.
+map_coreop[".if_1"] = function(params)
+  if not params then return "condition" end
+  local lvl = condlevel + 1
+  local res = cond_eval(params[1])
+  condlevel = lvl
+  condstack[lvl] = res
+  if not res then stmtskip() end
+end
+
+map_coreop[".elif_1"] = function(params)
+  if not params then return "condition" end
+  if condlevel == 0 then wfatal(".elif without .if") end
+  local lvl = condlevel
+  local res = condstack[lvl]
+  if res then
+    if res == "else" then wfatal(".elif after .else") end
+  else
+    res = cond_eval(params[1])
+    if res then
+      condstack[lvl] = res
+      return
+    end
+  end
+  stmtskip()
+end
+
+map_coreop[".else_0"] = function(params)
+  if condlevel == 0 then wfatal(".else without .if") end
+  local lvl = condlevel
+  local res = condstack[lvl]
+  condstack[lvl] = "else"
+  if res then
+    if res == "else" then wfatal(".else after .else") end
+    stmtskip()
+  end
+end
+
+map_coreop[".endif_0"] = function(params)
+  local lvl = condlevel
+  if lvl == 0 then wfatal(".endif without .if") end
+  condlevel = lvl - 1
+end
+
+-- Check for unfinished conditionals.
+local function checkconds()
+  if g_errcount ~= "fatal" and condlevel ~= 0 then
+    wprinterr(g_fname, ":*: error: unbalanced conditional\n")
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Search for a file in the given path and open it for reading.
+local function pathopen(path, name)
+  local dirsep = package and match(package.path, "\\") and "\\" or "/"
+  for _,p in ipairs(path) do
+    local fullname = p == "" and name or p..dirsep..name
+    local fin = io.open(fullname, "r")
+    if fin then
+      g_fname = fullname
+      return fin
+    end
+  end
+end
+
+-- Include a file.
+map_coreop[".include_1"] = function(params)
+  if not params then return "filename" end
+  local name = params[1]
+  -- Save state. Ugly, I know. but upvalues are fast.
+  local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent
+  -- Read the included file.
+  local fatal = readfile(pathopen(g_opt.include, name) or
+			 wfatal("include file `"..name.."' not found"))
+  -- Restore state.
+  g_synclineno = -1
+  g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi
+  if fatal then wfatal("in include file") end
+end
+
+-- Make .include and conditionals initially available, too.
+map_op[".include_1"] = map_coreop[".include_1"]
+map_op[".if_1"] = map_coreop[".if_1"]
+map_op[".elif_1"] = map_coreop[".elif_1"]
+map_op[".else_0"] = map_coreop[".else_0"]
+map_op[".endif_0"] = map_coreop[".endif_0"]
+
+------------------------------------------------------------------------------
+
+-- Support variables for macros.
+local mac_capture, mac_lineno, mac_name
+local mac_active = {}
+local mac_list = {}
+
+-- Pseudo-opcode to define a macro.
+map_coreop[".macro_*"] = function(mparams)
+  if not mparams then return "name [, params...]" end
+  -- Split off and validate macro name.
+  local name = remove(mparams, 1)
+  if not name then werror("missing macro name") end
+  if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]*$")) then
+    wfatal("bad macro name `"..name.."'")
+  end
+  -- Validate macro parameter names.
+  local mdup = {}
+  for _,mp in ipairs(mparams) do
+    if not match(mp, "^[%a_][%w_]*$") then
+      wfatal("bad macro parameter name `"..mp.."'")
+    end
+    if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end
+    mdup[mp] = true
+  end
+  -- Check for duplicate or recursive macro definitions.
+  local opname = name.."_"..#mparams
+  if map_op[opname] or map_op[name.."_*"] then
+    wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)")
+  end
+  if mac_capture then wfatal("recursive macro definition") end
+
+  -- Enable statement capture.
+  local lines = {}
+  mac_lineno = g_lineno
+  mac_name = name
+  mac_capture = function(stmt) -- Statement capture function.
+    -- Stop macro definition with .endmacro pseudo-opcode.
+    if not match(stmt, "^%s*.endmacro%s*$") then
+      lines[#lines+1] = stmt
+      return
+    end
+    mac_capture = nil
+    mac_lineno = nil
+    mac_name = nil
+    mac_list[#mac_list+1] = opname
+    -- Add macro-op definition.
+    map_op[opname] = function(params)
+      if not params then return mparams, lines end
+      -- Protect against recursive macro invocation.
+      if mac_active[opname] then wfatal("recursive macro invocation") end
+      mac_active[opname] = true
+      -- Setup substitution map.
+      local subst = {}
+      for i,mp in ipairs(mparams) do subst[mp] = params[i] end
+      local mcom
+      if g_opt.maccomment and g_opt.comment then
+	mcom = " MACRO "..name.." ("..#mparams..")"
+	wcomment("{"..mcom)
+      end
+      -- Loop through all captured statements
+      for _,stmt in ipairs(lines) do
+	-- Substitute macro parameters.
+	local st = gsub(stmt, "[%w_]+", subst)
+	st = definesubst(st)
+	st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b.
+	if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end
+	-- Emit statement. Use a protected call for better diagnostics.
+	local ok, err = pcall(dostmt, st)
+	if not ok then
+	  -- Add the captured statement to the error.
+	  wprinterr(err, "\n", g_indent, "|  ", stmt,
+		    "\t[MACRO ", name, " (", #mparams, ")]\n")
+	end
+      end
+      if mcom then wcomment("}"..mcom) end
+      mac_active[opname] = nil
+    end
+  end
+end
+
+-- An .endmacro pseudo-opcode outside of a macro definition is an error.
+map_coreop[".endmacro_0"] = function(params)
+  wfatal(".endmacro without .macro")
+end
+
+-- Dump all macros and their contents (with -PP only).
+local function dumpmacros(out, lvl)
+  sort(mac_list)
+  out:write("Macros:\n")
+  for _,opname in ipairs(mac_list) do
+    local name = sub(opname, 1, -3)
+    local params, lines = map_op[opname]()
+    out:write(format("  %-20s %s\n", name, concat(params, ", ")))
+    if lvl > 1 then
+      for _,line in ipairs(lines) do
+	out:write("  |", line, "\n")
+      end
+      out:write("\n")
+    end
+  end
+  out:write("\n")
+end
+
+-- Check for unfinished macro definitions.
+local function checkmacros()
+  if mac_capture then
+    wprinterr(g_fname, ":", mac_lineno,
+	      ": error: unfinished .macro `", mac_name ,"'\n")
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Support variables for captures.
+local cap_lineno, cap_name
+local cap_buffers = {}
+local cap_used = {}
+
+-- Start a capture.
+map_coreop[".capture_1"] = function(params)
+  if not params then return "name" end
+  wflush()
+  local name = params[1]
+  if not match(name, "^[%a_][%w_]*$") then
+    wfatal("bad capture name `"..name.."'")
+  end
+  if cap_name then
+    wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno)
+  end
+  cap_name = name
+  cap_lineno = g_lineno
+  -- Create or continue a capture buffer and start the output line capture.
+  local buf = cap_buffers[name]
+  if not buf then buf = {}; cap_buffers[name] = buf end
+  g_capbuffer = buf
+  g_synclineno = 0
+end
+
+-- Stop a capture.
+map_coreop[".endcapture_0"] = function(params)
+  wflush()
+  if not cap_name then wfatal(".endcapture without a valid .capture") end
+  cap_name = nil
+  cap_lineno = nil
+  g_capbuffer = nil
+  g_synclineno = 0
+end
+
+-- Dump a capture buffer.
+map_coreop[".dumpcapture_1"] = function(params)
+  if not params then return "name" end
+  wflush()
+  local name = params[1]
+  if not match(name, "^[%a_][%w_]*$") then
+    wfatal("bad capture name `"..name.."'")
+  end
+  cap_used[name] = true
+  wline(function(out)
+    local buf = cap_buffers[name]
+    if buf then wdumplines(out, buf) end
+  end)
+  g_synclineno = 0
+end
+
+-- Dump all captures and their buffers (with -PP only).
+local function dumpcaptures(out, lvl)
+  out:write("Captures:\n")
+  for name,buf in pairs(cap_buffers) do
+    out:write(format("  %-20s %4s)\n", name, "("..#buf))
+    if lvl > 1 then
+      local bar = rep("=", 76)
+      out:write("  ", bar, "\n")
+      for _,line in ipairs(buf) do
+	out:write("  ", line, "\n")
+      end
+      out:write("  ", bar, "\n\n")
+    end
+  end
+  out:write("\n")
+end
+
+-- Check for unfinished or unused captures.
+local function checkcaptures()
+  if cap_name then
+    wprinterr(g_fname, ":", cap_lineno,
+	      ": error: unfinished .capture `", cap_name,"'\n")
+    return
+  end
+  for name in pairs(cap_buffers) do
+    if not cap_used[name] then
+      wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n")
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Sections names.
+local map_sections = {}
+
+-- Pseudo-opcode to define code sections.
+-- TODO: Data sections, BSS sections. Needs extra C code and API.
+map_coreop[".section_*"] = function(params)
+  if not params then return "name..." end
+  if #map_sections > 0 then werror("duplicate section definition") end
+  wflush()
+  for sn,name in ipairs(params) do
+    local opname = "."..name.."_0"
+    if not match(name, "^[%a][%w_]*$") or
+       map_op[opname] or map_op["."..name.."_*"] then
+      werror("bad section name `"..name.."'")
+    end
+    map_sections[#map_sections+1] = name
+    wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1))
+    map_op[opname] = function(params) g_arch.section(sn-1) end
+  end
+  wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections))
+end
+
+-- Dump all sections.
+local function dumpsections(out, lvl)
+  out:write("Sections:\n")
+  for _,name in ipairs(map_sections) do
+    out:write(format("  %s\n", name))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Replacement for customized Lua, which lacks the package library.
+local prefix = ""
+if not require then
+  function require(name)
+    local fp = assert(io.open(prefix..name..".lua"))
+    local s = fp:read("*a")
+    assert(fp:close())
+    return assert(loadstring(s, "@"..name..".lua"))()
+  end
+end
+
+-- Load architecture-specific module.
+local function loadarch(arch)
+  if not match(arch, "^[%w_]+$") then return "bad arch name" end
+  local ok, m_arch = pcall(require, "dasm_"..arch)
+  if not ok then return "cannot load module: "..m_arch end
+  g_arch = m_arch
+  wflush = m_arch.passcb(wline, werror, wfatal, wwarn)
+  m_arch.setup(arch, g_opt)
+  map_op, map_def = m_arch.mergemaps(map_coreop, map_def)
+end
+
+-- Dump architecture description.
+function opt_map.dumparch(args)
+  local name = optparam(args)
+  if not g_arch then
+    local err = loadarch(name)
+    if err then opterror(err) end
+  end
+
+  local t = {}
+  for name in pairs(map_coreop) do t[#t+1] = name end
+  for name in pairs(map_op) do t[#t+1] = name end
+  sort(t)
+
+  local out = stdout
+  local _arch = g_arch._info
+  out:write(format("%s version %s, released %s, %s\n",
+    _info.name, _info.version, _info.release, _info.url))
+  g_arch.dumparch(out)
+
+  local pseudo = true
+  out:write("Pseudo-Opcodes:\n")
+  for _,sname in ipairs(t) do
+    local name, nparam = match(sname, "^(.+)_([0-9%*])$")
+    if name then
+      if pseudo and sub(name, 1, 1) ~= "." then
+	out:write("\nOpcodes:\n")
+	pseudo = false
+      end
+      local f = map_op[sname]
+      local s
+      if nparam ~= "*" then nparam = nparam + 0 end
+      if nparam == 0 then
+	s = ""
+      elseif type(f) == "string" then
+	s = map_op[".template__"](nil, f, nparam)
+      else
+	s = f(nil, nparam)
+      end
+      if type(s) == "table" then
+	for _,s2 in ipairs(s) do
+	  out:write(format("  %-12s %s\n", name, s2))
+	end
+      else
+	out:write(format("  %-12s %s\n", name, s))
+      end
+    end
+  end
+  out:write("\n")
+  exit(0)
+end
+
+-- Pseudo-opcode to set the architecture.
+-- Only initially available (map_op is replaced when called).
+map_op[".arch_1"] = function(params)
+  if not params then return "name" end
+  local err = loadarch(params[1])
+  if err then wfatal(err) end
+  wline(format("#if DASM_VERSION != %d", _info.vernum))
+  wline('#error "Version mismatch between DynASM and included encoding engine"')
+  wline("#endif")
+end
+
+-- Dummy .arch pseudo-opcode to improve the error report.
+map_coreop[".arch_1"] = function(params)
+  if not params then return "name" end
+  wfatal("duplicate .arch statement")
+end
+
+------------------------------------------------------------------------------
+
+-- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'.
+map_coreop[".nop_*"] = function(params)
+  if not params then return "[ignored...]" end
+end
+
+-- Pseudo-opcodes to raise errors.
+map_coreop[".error_1"] = function(params)
+  if not params then return "message" end
+  werror(params[1])
+end
+
+map_coreop[".fatal_1"] = function(params)
+  if not params then return "message" end
+  wfatal(params[1])
+end
+
+-- Dump all user defined elements.
+local function dumpdef(out)
+  local lvl = g_opt.dumpdef
+  if lvl == 0 then return end
+  dumpsections(out, lvl)
+  dumpdefines(out, lvl)
+  if g_arch then g_arch.dumpdef(out, lvl) end
+  dumpmacros(out, lvl)
+  dumpcaptures(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Helper for splitstmt.
+local splitlvl
+
+local function splitstmt_one(c)
+  if c == "(" then
+    splitlvl = ")"..splitlvl
+  elseif c == "[" then
+    splitlvl = "]"..splitlvl
+  elseif c == "{" then
+    splitlvl = "}"..splitlvl
+  elseif c == ")" or c == "]" or c == "}" then
+    if sub(splitlvl, 1, 1) ~= c then werror("unbalanced (), [] or {}") end
+    splitlvl = sub(splitlvl, 2)
+  elseif splitlvl == "" then
+    return " \0 "
+  end
+  return c
+end
+
+-- Split statement into (pseudo-)opcode and params.
+local function splitstmt(stmt)
+  -- Convert label with trailing-colon into .label statement.
+  local label = match(stmt, "^%s*(.+):%s*$")
+  if label then return ".label", {label} end
+
+  -- Split at commas and equal signs, but obey parentheses and brackets.
+  splitlvl = ""
+  stmt = gsub(stmt, "[,%(%)%[%]{}]", splitstmt_one)
+  if splitlvl ~= "" then werror("unbalanced () or []") end
+
+  -- Split off opcode.
+  local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$")
+  if not op then werror("bad statement syntax") end
+
+  -- Split parameters.
+  local params = {}
+  for p in gmatch(other, "%s*(%Z+)%z?") do
+    params[#params+1] = gsub(p, "%s+$", "")
+  end
+  if #params > 16 then werror("too many parameters") end
+
+  params.op = op
+  return op, params
+end
+
+-- Process a single statement.
+dostmt = function(stmt)
+  -- Ignore empty statements.
+  if match(stmt, "^%s*$") then return end
+
+  -- Capture macro defs before substitution.
+  if mac_capture then return mac_capture(stmt) end
+  stmt = definesubst(stmt)
+
+  -- Emit C code without parsing the line.
+  if sub(stmt, 1, 1) == "|" then
+    local tail = sub(stmt, 2)
+    wflush()
+    if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end
+    return
+  end
+
+  -- Split into (pseudo-)opcode and params.
+  local op, params = splitstmt(stmt)
+
+  -- Get opcode handler (matching # of parameters or generic handler).
+  local f = map_op[op.."_"..#params] or map_op[op.."_*"]
+  if not f then
+    if not g_arch then wfatal("first statement must be .arch") end
+    -- Improve error report.
+    for i=0,9 do
+      if map_op[op.."_"..i] then
+	werror("wrong number of parameters for `"..op.."'")
+      end
+    end
+    werror("unknown statement `"..op.."'")
+  end
+
+  -- Call opcode handler or special handler for template strings.
+  if type(f) == "string" then
+    map_op[".template__"](params, f)
+  else
+    f(params)
+  end
+end
+
+-- Process a single line.
+local function doline(line)
+  if g_opt.flushline then wflush() end
+
+  -- Assembler line?
+  local indent, aline = match(line, "^(%s*)%|(.*)$")
+  if not aline then
+    -- No, plain C code line, need to flush first.
+    wflush()
+    wsync()
+    wline(line, false)
+    return
+  end
+
+  g_indent = indent -- Remember current line indentation.
+
+  -- Emit C code (even from macros). Avoids echo and line parsing.
+  if sub(aline, 1, 1) == "|" then
+    if not mac_capture then
+      wsync()
+    elseif g_opt.comment then
+      wsync()
+      wcomment(aline)
+    end
+    dostmt(aline)
+    return
+  end
+
+  -- Echo assembler line as a comment.
+  if g_opt.comment then
+    wsync()
+    wcomment(aline)
+  end
+
+  -- Strip assembler comments.
+  aline = gsub(aline, "//.*$", "")
+
+  -- Split line into statements at semicolons.
+  if match(aline, ";") then
+    for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end
+  else
+    dostmt(aline)
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Write DynASM header.
+local function dasmhead(out)
+  out:write(format([[
+/*
+** This file has been pre-processed with DynASM.
+** %s
+** DynASM version %s, DynASM %s version %s
+** DO NOT EDIT! The original file is in "%s".
+*/
+
+]], _info.url,
+    _info.version, g_arch._info.arch, g_arch._info.version,
+    g_fname))
+end
+
+-- Read input file.
+readfile = function(fin)
+  g_indent = ""
+  g_lineno = 0
+  g_synclineno = -1
+
+  -- Process all lines.
+  for line in fin:lines() do
+    g_lineno = g_lineno + 1
+    g_curline = line
+    local ok, err = pcall(doline, line)
+    if not ok and wprinterr(err, "\n") then return true end
+  end
+  wflush()
+
+  -- Close input file.
+  assert(fin == stdin or fin:close())
+end
+
+-- Write output file.
+local function writefile(outfile)
+  local fout
+
+  -- Open output file.
+  if outfile == nil or outfile == "-" then
+    fout = stdout
+  else
+    fout = assert(io.open(outfile, "w"))
+  end
+
+  -- Write all buffered lines
+  wdumplines(fout, g_wbuffer)
+
+  -- Close output file.
+  assert(fout == stdout or fout:close())
+
+  -- Optionally dump definitions.
+  dumpdef(fout == stdout and stderr or stdout)
+end
+
+-- Translate an input file to an output file.
+local function translate(infile, outfile)
+  g_wbuffer = {}
+  g_indent = ""
+  g_lineno = 0
+  g_synclineno = -1
+
+  -- Put header.
+  wline(dasmhead)
+
+  -- Read input file.
+  local fin
+  if infile == "-" then
+    g_fname = "(stdin)"
+    fin = stdin
+  else
+    g_fname = infile
+    fin = assert(io.open(infile, "r"))
+  end
+  readfile(fin)
+
+  -- Check for errors.
+  if not g_arch then
+    wprinterr(g_fname, ":*: error: missing .arch directive\n")
+  end
+  checkconds()
+  checkmacros()
+  checkcaptures()
+
+  if g_errcount ~= 0 then
+    stderr:write(g_fname, ":*: info: ", g_errcount, " error",
+      (type(g_errcount) == "number" and g_errcount > 1) and "s" or "",
+      " in input file -- no output file generated.\n")
+    dumpdef(stderr)
+    exit(1)
+  end
+
+  -- Write output file.
+  writefile(outfile)
+end
+
+------------------------------------------------------------------------------
+
+-- Print help text.
+function opt_map.help()
+  stdout:write("DynASM -- ", _info.description, ".\n")
+  stdout:write("DynASM ", _info.version, " ", _info.release, "  ", _info.url, "\n")
+  stdout:write[[
+
+Usage: dynasm [OPTION]... INFILE.dasc|-
+
+  -h, --help           Display this help text.
+  -V, --version        Display version and copyright information.
+
+  -o, --outfile FILE   Output file name (default is stdout).
+  -I, --include DIR    Add directory to the include search path.
+
+  -c, --ccomment       Use /* */ comments for assembler lines.
+  -C, --cppcomment     Use // comments for assembler lines (default).
+  -N, --nocomment      Suppress assembler lines in output.
+  -M, --maccomment     Show macro expansions as comments (default off).
+
+  -L, --nolineno       Suppress CPP line number information in output.
+  -F, --flushline      Flush action list for every line.
+
+  -D NAME[=SUBST]      Define a substitution.
+  -U NAME              Undefine a substitution.
+
+  -P, --dumpdef        Dump defines, macros, etc. Repeat for more output.
+  -A, --dumparch ARCH  Load architecture ARCH and dump description.
+]]
+  exit(0)
+end
+
+-- Print version information.
+function opt_map.version()
+  stdout:write(format("%s version %s, released %s\n%s\n\n%s",
+    _info.name, _info.version, _info.release, _info.url, _info.copyright))
+  exit(0)
+end
+
+-- Misc. options.
+function opt_map.outfile(args) g_opt.outfile = optparam(args) end
+function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end
+function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end
+function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end
+function opt_map.nocomment() g_opt.comment = false end
+function opt_map.maccomment() g_opt.maccomment = true end
+function opt_map.nolineno() g_opt.cpp = false end
+function opt_map.flushline() g_opt.flushline = true end
+function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end
+
+------------------------------------------------------------------------------
+
+-- Short aliases for long options.
+local opt_alias = {
+  h = "help", ["?"] = "help", V = "version",
+  o = "outfile", I = "include",
+  c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment",
+  L = "nolineno", F = "flushline",
+  P = "dumpdef", A = "dumparch",
+}
+
+-- Parse single option.
+local function parseopt(opt, args)
+  opt_current = #opt == 1 and "-"..opt or "--"..opt
+  local f = opt_map[opt] or opt_map[opt_alias[opt]]
+  if not f then
+    opterror("unrecognized option `", opt_current, "'. Try `--help'.\n")
+  end
+  f(args)
+end
+
+-- Parse arguments.
+local function parseargs(args)
+  -- Default options.
+  g_opt.comment = "//|"
+  g_opt.endcomment = ""
+  g_opt.cpp = true
+  g_opt.dumpdef = 0
+  g_opt.include = { "" }
+
+  -- Process all option arguments.
+  args.argn = 1
+  repeat
+    local a = args[args.argn]
+    if not a then break end
+    local lopt, opt = match(a, "^%-(%-?)(.+)")
+    if not opt then break end
+    args.argn = args.argn + 1
+    if lopt == "" then
+      -- Loop through short options.
+      for o in gmatch(opt, ".") do parseopt(o, args) end
+    else
+      -- Long option.
+      parseopt(opt, args)
+    end
+  until false
+
+  -- Check for proper number of arguments.
+  local nargs = #args - args.argn + 1
+  if nargs ~= 1 then
+    if nargs == 0 then
+      if g_opt.dumpdef > 0 then return dumpdef(stdout) end
+    end
+    opt_map.help()
+  end
+
+  -- Translate a single input file to a single output file
+  -- TODO: Handle multiple files?
+  translate(args[args.argn], g_opt.outfile)
+end
+
+------------------------------------------------------------------------------
+
+-- Add the directory dynasm.lua resides in to the Lua module search path.
+local arg = arg
+if arg and arg[0] then
+  prefix = match(arg[0], "^(.*[/\\])")
+  if package and prefix then package.path = prefix.."?.lua;"..package.path end
+end
+
+-- Start DynASM.
+parseargs{...}
+
+------------------------------------------------------------------------------
+

+ 88 - 0
luajit.mod/luajit/etc/luajit.1

@@ -0,0 +1,88 @@
+.TH luajit 1 "" "" "LuaJIT documentation"
+.SH NAME
+luajit \- Just-In-Time Compiler for the Lua Language
+\fB
+.SH SYNOPSIS
+.B luajit
+[\fIoptions\fR]... [\fIscript\fR [\fIargs\fR]...]
+.SH "WEB SITE"
+.IR http://luajit.org
+.SH DESCRIPTION
+.PP
+This is the command-line program to run Lua programs with \fBLuaJIT\fR.
+.PP
+\fBLuaJIT\fR is a just-in-time (JIT) compiler for the Lua language.
+The virtual machine (VM) is based on a fast interpreter combined with
+a trace compiler. It can significantly improve the performance of Lua programs.
+.PP
+\fBLuaJIT\fR is API\- and ABI-compatible with the VM of the standard
+Lua\ 5.1 interpreter. When embedding the VM into an application,
+the built library can be used as a drop-in replacement.
+.SH OPTIONS
+.TP
+.BI "\-e " chunk
+Run the given chunk of Lua code.
+.TP
+.BI "\-l " library
+Load the named library, just like \fBrequire("\fR\fIlibrary\fR\fB")\fR.
+.TP
+.BI "\-b " ...
+Save or list bytecode. Run without arguments to get help on options.
+.TP
+.BI "\-j " command
+Perform LuaJIT control command (optional space after \fB\-j\fR).
+.TP
+.BI "\-O" [opt]
+Control LuaJIT optimizations.
+.TP
+.B "\-i"
+Run in interactive mode.
+.TP
+.B "\-v"
+Show \fBLuaJIT\fR version.
+.TP
+.B "\-E"
+Ignore environment variables.
+.TP
+.B "\-\-"
+Stop processing options.
+.TP
+.B "\-"
+Read script from stdin instead.
+.PP
+After all options are processed, the given \fIscript\fR is run.
+The arguments are passed in the global \fIarg\fR table.
+.PP
+Interactive mode is only entered, if no \fIscript\fR and no \fB\-e\fR
+option is given. Interactive mode can be left with EOF (\fICtrl\-Z\fB).
+.SH EXAMPLES
+.TP
+luajit hello.lua world
+
+Prints "Hello world", assuming \fIhello.lua\fR contains:
+.br
+  print("Hello", arg[1])
+.TP
+luajit \-e "local x=0; for i=1,1e9 do x=x+i end; print(x)"
+
+Calculates the sum of the numbers from 1 to 1000000000.
+.br
+And finishes in a reasonable amount of time, too.
+.TP
+luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end"
+
+Runs some nested loops and shows the resulting traces.
+.SH COPYRIGHT
+.PP
+\fBLuaJIT\fR is Copyright \(co 2005-2015 Mike Pall.
+.br
+\fBLuaJIT\fR is open source software, released under the MIT license.
+.SH SEE ALSO
+.PP
+More details in the provided HTML docs or at:
+.IR http://luajit.org
+.br
+More about the Lua language can be found at:
+.IR http://lua.org/docs.html
+.PP
+lua(1)

+ 25 - 0
luajit.mod/luajit/etc/luajit.pc

@@ -0,0 +1,25 @@
+# Package information for LuaJIT to be used by pkg-config.
+majver=2
+minver=0
+relver=4
+version=${majver}.${minver}.${relver}
+abiver=5.1
+
+prefix=/usr/local
+multilib=lib
+exec_prefix=${prefix}
+libdir=${exec_prefix}/${multilib}
+libname=luajit-${abiver}
+includedir=${prefix}/include/luajit-${majver}.${minver}
+
+INSTALL_LMOD=${prefix}/share/lua/${abiver}
+INSTALL_CMOD=${prefix}/${multilib}/lua/${abiver}
+
+Name: LuaJIT
+Description: Just-in-time compiler for Lua
+URL: http://luajit.org
+Version: ${version}
+Requires:
+Libs: -L${libdir} -l${libname}
+Libs.private: -Wl,-E -lm -ldl
+Cflags: -I${includedir}

+ 684 - 0
luajit.mod/luajit/src/Makefile

@@ -0,0 +1,684 @@
+##############################################################################
+# LuaJIT Makefile. Requires GNU Make.
+#
+# Please read doc/install.html before changing any variables!
+#
+# Suitable for POSIX platforms (Linux, *BSD, OSX etc.).
+# Also works with MinGW and Cygwin on Windows.
+# Please check msvcbuild.bat for building with MSVC on Windows.
+#
+# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+##############################################################################
+
+MAJVER=  2
+MINVER=  0
+RELVER=  4
+ABIVER=  5.1
+NODOTABIVER= 51
+
+##############################################################################
+#############################  COMPILER OPTIONS  #############################
+##############################################################################
+# These options mainly affect the speed of the JIT compiler itself, not the
+# speed of the JIT-compiled code. Turn any of the optional settings on by
+# removing the '#' in front of them. Make sure you force a full recompile
+# with "make clean", followed by "make" if you change any options.
+#
+# LuaJIT builds as a native 32 or 64 bit binary by default.
+CC= gcc
+#
+# Use this if you want to force a 32 bit build on a 64 bit multilib OS.
+#CC= gcc -m32
+#
+# Since the assembler part does NOT maintain a frame pointer, it's pointless
+# to slow down the C part by not omitting it. Debugging, tracebacks and
+# unwinding are not affected -- the assembler part has frame unwind
+# information and GCC emits it where needed (x64) or with -g (see CCDEBUG).
+CCOPT= -O2 -fomit-frame-pointer
+# Use this if you want to generate a smaller binary (but it's slower):
+#CCOPT= -Os -fomit-frame-pointer
+# Note: it's no longer recommended to use -O3 with GCC 4.x.
+# The I-Cache bloat usually outweighs the benefits from aggressive inlining.
+#
+# Target-specific compiler options:
+#
+# x86 only: it's recommended to compile at least for i686. Better yet,
+# compile for an architecture that has SSE2, too (-msse -msse2).
+#
+# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
+# the binaries to a different machine you could also use: -march=native
+#
+CCOPT_x86= -march=i686
+CCOPT_x64=
+CCOPT_arm=
+CCOPT_ppc=
+CCOPT_ppcspe=
+CCOPT_mips=
+#
+CCDEBUG=
+# Uncomment the next line to generate debug information:
+#CCDEBUG= -g
+#
+CCWARN= -Wall
+# Uncomment the next line to enable more warnings:
+#CCWARN+= -Wextra -Wdeclaration-after-statement -Wredundant-decls -Wshadow -Wpointer-arith
+#
+##############################################################################
+
+##############################################################################
+################################  BUILD MODE  ################################
+##############################################################################
+# The default build mode is mixed mode on POSIX. On Windows this is the same
+# as dynamic mode.
+#
+# Mixed mode creates a static + dynamic library and a statically linked luajit.
+BUILDMODE= mixed
+#
+# Static mode creates a static library and a statically linked luajit.
+#BUILDMODE= static
+#
+# Dynamic mode creates a dynamic library and a dynamically linked luajit.
+# Note: this executable will only run when the library is installed!
+#BUILDMODE= dynamic
+#
+##############################################################################
+
+##############################################################################
+#################################  FEATURES  #################################
+##############################################################################
+# Enable/disable these features as needed, but make sure you force a full
+# recompile with "make clean", followed by "make".
+XCFLAGS=
+#
+# Permanently disable the FFI extension to reduce the size of the LuaJIT
+# executable. But please consider that the FFI library is compiled-in,
+# but NOT loaded by default. It only allocates any memory, if you actually
+# make use of it.
+#XCFLAGS+= -DLUAJIT_DISABLE_FFI
+#
+# Features from Lua 5.2 that are unlikely to break existing code are
+# enabled by default. Some other features that *might* break some existing
+# code (e.g. __pairs or os.execute() return values) can be enabled here.
+# Note: this does not provide full compatibility with Lua 5.2 at this time.
+#XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT
+#
+# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter.
+#XCFLAGS+= -DLUAJIT_DISABLE_JIT
+#
+# Some architectures (e.g. PPC) can use either single-number (1) or
+# dual-number (2) mode. Uncomment one of these lines to override the
+# default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details.
+#XCFLAGS+= -DLUAJIT_NUMMODE=1
+#XCFLAGS+= -DLUAJIT_NUMMODE=2
+#
+##############################################################################
+
+##############################################################################
+############################  DEBUGGING SUPPORT  #############################
+##############################################################################
+# Enable these options as needed, but make sure you force a full recompile
+# with "make clean", followed by "make".
+# Note that most of these are NOT suitable for benchmarking or release mode!
+#
+# Use the system provided memory allocator (realloc) instead of the
+# bundled memory allocator. This is slower, but sometimes helpful for
+# debugging. This option cannot be enabled on x64, since realloc usually
+# doesn't return addresses in the right address range.
+# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
+# the only way to get useful results from it for all other architectures.
+#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
+#
+# This define is required to run LuaJIT under Valgrind. The Valgrind
+# header files must be installed. You should enable debug information, too.
+# Use --suppressions=lj.supp to avoid some false positives.
+#XCFLAGS+= -DLUAJIT_USE_VALGRIND
+#
+# This is the client for the GDB JIT API. GDB 7.0 or higher is required
+# to make use of it. See lj_gdbjit.c for details. Enabling this causes
+# a non-negligible overhead, even when not running under GDB.
+#XCFLAGS+= -DLUAJIT_USE_GDBJIT
+#
+# Turn on assertions for the Lua/C API to debug problems with lua_* calls.
+# This is rather slow -- use only while developing C libraries/embeddings.
+#XCFLAGS+= -DLUA_USE_APICHECK
+#
+# Turn on assertions for the whole LuaJIT VM. This significantly slows down
+# everything. Use only if you suspect a problem with LuaJIT itself.
+#XCFLAGS+= -DLUA_USE_ASSERT
+#
+##############################################################################
+# You probably don't need to change anything below this line!
+##############################################################################
+
+##############################################################################
+# Flags and options for host and target.
+##############################################################################
+
+# You can override the following variables at the make command line:
+#   CC       HOST_CC       STATIC_CC       DYNAMIC_CC
+#   CFLAGS   HOST_CFLAGS   TARGET_CFLAGS
+#   LDFLAGS  HOST_LDFLAGS  TARGET_LDFLAGS  TARGET_SHLDFLAGS
+#   LIBS     HOST_LIBS     TARGET_LIBS
+#   CROSS    HOST_SYS      TARGET_SYS      TARGET_FLAGS
+#
+# Cross-compilation examples:
+#   make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
+#   make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
+
+CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
+
+HOST_CC= $(CC)
+HOST_RM= rm -f
+# If left blank, minilua is built and used. You can supply an installed
+# copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua
+HOST_LUA=
+
+HOST_XCFLAGS= -I.
+HOST_XLDFLAGS=
+HOST_XLIBS=
+HOST_ACFLAGS= $(CCOPTIONS) $(HOST_XCFLAGS) $(TARGET_ARCH) $(HOST_CFLAGS)
+HOST_ALDFLAGS= $(LDOPTIONS) $(HOST_XLDFLAGS) $(HOST_LDFLAGS)
+HOST_ALIBS= $(HOST_XLIBS) $(LIBS) $(HOST_LIBS)
+
+STATIC_CC = $(CROSS)$(CC)
+DYNAMIC_CC = $(CROSS)$(CC) -fPIC
+TARGET_CC= $(STATIC_CC)
+TARGET_STCC= $(STATIC_CC)
+TARGET_DYNCC= $(DYNAMIC_CC)
+TARGET_LD= $(CROSS)$(CC)
+TARGET_AR= $(CROSS)ar rcus
+TARGET_STRIP= $(CROSS)strip
+
+TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
+TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER)
+TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib
+TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME)
+TARGET_DLLNAME= lua$(NODOTABIVER).dll
+TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME)
+TARGET_DYNXLDOPTS=
+
+TARGET_LFSFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
+TARGET_XCFLAGS= $(TARGET_LFSFLAGS) -U_FORTIFY_SOURCE
+TARGET_XLDFLAGS=
+TARGET_XLIBS= -lm
+TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
+TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
+TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
+TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
+TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
+
+TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
+ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH)))
+  TARGET_LJARCH= x64
+else
+ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH)))
+  TARGET_LJARCH= x86
+else
+ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
+  TARGET_LJARCH= arm
+else
+ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
+  TARGET_LJARCH= ppc
+else
+ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
+  TARGET_LJARCH= ppcspe
+else
+ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
+    TARGET_ARCH= -D__MIPSEL__=1
+  endif
+  TARGET_LJARCH= mips
+else
+  $(error Unsupported target architecture)
+endif
+endif
+endif
+endif
+endif
+endif
+
+ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
+  TARGET_SYS= PS3
+  TARGET_ARCH+= -D__CELLOS_LV2__
+  TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
+endif
+ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+  TARGET_ARCH+= -DLUAJIT_NO_UNWIND
+endif
+
+TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
+TARGET_ARCH+= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET_LJARCH))
+
+ifneq (,$(PREFIX))
+ifneq (/usr/local,$(PREFIX))
+  TARGET_XCFLAGS+= -DLUA_ROOT=\"$(PREFIX)\"
+  ifneq (/usr,$(PREFIX))
+    TARGET_DYNXLDOPTS= -Wl,-rpath,$(TARGET_LIBPATH)
+  endif
+endif
+endif
+ifneq (,$(MULTILIB))
+  TARGET_XCFLAGS+= -DLUA_MULTILIB=\"$(MULTILIB)\"
+endif
+ifneq (,$(LMULTILIB))
+  TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\"
+endif
+
+##############################################################################
+# System detection.
+##############################################################################
+
+ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
+  HOST_SYS= Windows
+  HOST_RM= del
+else
+  HOST_SYS:= $(shell uname -s)
+  ifneq (,$(findstring MINGW,$(HOST_SYS)))
+    HOST_SYS= Windows
+    HOST_MSYS= mingw
+  endif
+  ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
+    HOST_SYS= Windows
+    HOST_MSYS= cygwin
+  endif
+endif
+
+TARGET_SYS?= $(HOST_SYS)
+ifeq (Windows,$(TARGET_SYS))
+  TARGET_STRIP+= --strip-unneeded
+  TARGET_XSHLDFLAGS= -shared
+  TARGET_DYNXLDOPTS=
+else
+ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
+  TARGET_XCFLAGS+= -fno-stack-protector
+endif
+ifeq (Darwin,$(TARGET_SYS))
+  ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
+    export MACOSX_DEPLOYMENT_TARGET=10.4
+  endif
+  TARGET_STRIP+= -x
+  TARGET_AR+= 2>/dev/null
+  TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
+  TARGET_DYNXLDOPTS=
+  TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
+  ifeq (x64,$(TARGET_LJARCH))
+    TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
+    TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
+  endif
+else
+ifeq (iOS,$(TARGET_SYS))
+  TARGET_STRIP+= -x
+  TARGET_AR+= 2>/dev/null
+  TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
+  TARGET_DYNXLDOPTS=
+  TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
+else
+  ifneq (SunOS,$(TARGET_SYS))
+    ifneq (PS3,$(TARGET_SYS))
+      TARGET_XLDFLAGS+= -Wl,-E
+    endif
+  endif
+  ifeq (Linux,$(TARGET_SYS))
+    TARGET_XLIBS+= -ldl
+  endif
+  ifeq (GNU/kFreeBSD,$(TARGET_SYS))
+    TARGET_XLIBS+= -ldl
+  endif
+endif
+endif
+endif
+
+ifneq ($(HOST_SYS),$(TARGET_SYS))
+  ifeq (Windows,$(TARGET_SYS))
+    HOST_XCFLAGS+= -malign-double -DLUAJIT_OS=LUAJIT_OS_WINDOWS
+  else
+  ifeq (Linux,$(TARGET_SYS))
+    HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_LINUX
+  else
+  ifeq (Darwin,$(TARGET_SYS))
+    HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
+  else
+  ifeq (iOS,$(TARGET_SYS))
+    HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
+  else
+    HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER
+  endif
+  endif
+  endif
+  endif
+endif
+
+ifneq (,$(CCDEBUG))
+  TARGET_STRIP= @:
+endif
+
+##############################################################################
+# Files and pathnames.
+##############################################################################
+
+MINILUA_O= host/minilua.o
+MINILUA_LIBS= -lm
+MINILUA_T= host/minilua
+MINILUA_X= $(MINILUA_T)
+
+ifeq (,$(HOST_LUA))
+  HOST_LUA= $(MINILUA_X)
+  DASM_DEP= $(MINILUA_T)
+endif
+
+DASM_DIR= ../dynasm
+DASM= $(HOST_LUA) $(DASM_DIR)/dynasm.lua
+DASM_XFLAGS=
+DASM_AFLAGS=
+DASM_ARCH= $(TARGET_LJARCH)
+
+ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D P64
+endif
+ifneq (,$(findstring LJ_HASJIT 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D JIT
+endif
+ifneq (,$(findstring LJ_HASFFI 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D FFI
+endif
+ifneq (,$(findstring LJ_DUALNUM 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D DUALNUM
+endif
+ifneq (,$(findstring LJ_ARCH_HASFPU 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D FPU
+  TARGET_ARCH+= -DLJ_ARCH_HASFPU=1
+else
+  TARGET_ARCH+= -DLJ_ARCH_HASFPU=0
+endif
+ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D HFABI
+  TARGET_ARCH+= -DLJ_ABI_SOFTFP=0
+else
+  TARGET_ARCH+= -DLJ_ABI_SOFTFP=1
+endif
+DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
+ifeq (Windows,$(TARGET_SYS))
+  DASM_AFLAGS+= -D WIN
+endif
+ifeq (x86,$(TARGET_LJARCH))
+  ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
+    DASM_AFLAGS+= -D SSE
+  endif
+else
+ifeq (x64,$(TARGET_LJARCH))
+  DASM_ARCH= x86
+else
+ifeq (arm,$(TARGET_LJARCH))
+  ifeq (iOS,$(TARGET_SYS))
+    DASM_AFLAGS+= -D IOS
+  endif
+else
+ifeq (ppc,$(TARGET_LJARCH))
+  ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
+    DASM_AFLAGS+= -D SQRT
+  endif
+  ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
+    DASM_AFLAGS+= -D ROUND
+  endif
+  ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH)))
+    DASM_AFLAGS+= -D GPR64
+  endif
+  ifeq (PS3,$(TARGET_SYS))
+    DASM_AFLAGS+= -D PPE -D TOC
+  endif
+endif
+endif
+endif
+endif
+
+DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
+DASM_DASC= vm_$(DASM_ARCH).dasc
+
+BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \
+	   host/buildvm_lib.o host/buildvm_fold.o
+BUILDVM_T= host/buildvm
+BUILDVM_X= $(BUILDVM_T)
+
+HOST_O= $(MINILUA_O) $(BUILDVM_O)
+HOST_T= $(MINILUA_T) $(BUILDVM_T)
+
+LJVM_S= lj_vm.s
+LJVM_O= lj_vm.o
+LJVM_BOUT= $(LJVM_S)
+LJVM_MODE= elfasm
+
+LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
+	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
+LJLIB_C= $(LJLIB_O:.o=.c)
+
+LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
+	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
+	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
+	  lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
+	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
+	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
+	  lj_asm.o lj_trace.o lj_gdbjit.o \
+	  lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
+	  lj_carith.o lj_clib.o lj_cparse.o \
+	  lj_lib.o lj_alloc.o lib_aux.o \
+	  $(LJLIB_O) lib_init.o
+
+LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
+LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
+
+LIB_VMDEF= jit/vmdef.lua
+LIB_VMDEFP= $(LIB_VMDEF)
+
+LUAJIT_O= luajit.o
+LUAJIT_A= libluajit.a
+LUAJIT_SO= libluajit.so
+LUAJIT_T= luajit
+
+ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T)
+ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \
+	    host/buildvm_arch.h
+ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP)
+WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk
+ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
+
+##############################################################################
+# Build mode handling.
+##############################################################################
+
+# Mixed mode defaults.
+TARGET_O= $(LUAJIT_A)
+TARGET_T= $(LUAJIT_T) $(LUAJIT_SO)
+TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO)
+
+ifeq (Windows,$(TARGET_SYS))
+  TARGET_DYNCC= $(STATIC_CC)
+  LJVM_MODE= peobj
+  LJVM_BOUT= $(LJVM_O)
+  LUAJIT_T= luajit.exe
+  ifeq (cygwin,$(HOST_MSYS))
+    LUAJIT_SO= cyg$(TARGET_DLLNAME)
+  else
+    LUAJIT_SO= $(TARGET_DLLNAME)
+  endif
+  # Mixed mode is not supported on Windows. And static mode doesn't work well.
+  # C modules cannot be loaded, because they bind to lua51.dll.
+  ifneq (static,$(BUILDMODE))
+    BUILDMODE= dynamic
+    TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL
+  endif
+endif
+ifeq (Darwin,$(TARGET_SYS))
+  LJVM_MODE= machasm
+endif
+ifeq (iOS,$(TARGET_SYS))
+  LJVM_MODE= machasm
+endif
+ifeq (SunOS,$(TARGET_SYS))
+  BUILDMODE= static
+endif
+ifeq (PS3,$(TARGET_SYS))
+  BUILDMODE= static
+endif
+
+ifeq (Windows,$(HOST_SYS))
+  MINILUA_T= host/minilua.exe
+  BUILDVM_T= host/buildvm.exe
+  ifeq (,$(HOST_MSYS))
+    MINILUA_X= host\minilua
+    BUILDVM_X= host\buildvm
+    ALL_RM:= $(subst /,\,$(ALL_RM))
+  endif
+endif
+
+ifeq (static,$(BUILDMODE))
+  TARGET_DYNCC= @:
+  TARGET_T= $(LUAJIT_T)
+  TARGET_DEP= $(LIB_VMDEF)
+else
+ifeq (dynamic,$(BUILDMODE))
+  ifneq (Windows,$(TARGET_SYS))
+    TARGET_CC= $(DYNAMIC_CC)
+  endif
+  TARGET_DYNCC= @:
+  LJVMCORE_DYNO= $(LJVMCORE_O)
+  TARGET_O= $(LUAJIT_SO)
+  TARGET_XLDFLAGS+= $(TARGET_DYNXLDOPTS)
+else
+ifeq (Darwin,$(TARGET_SYS))
+  TARGET_DYNCC= @:
+  LJVMCORE_DYNO= $(LJVMCORE_O)
+endif
+ifeq (iOS,$(TARGET_SYS))
+  TARGET_DYNCC= @:
+  LJVMCORE_DYNO= $(LJVMCORE_O)
+endif
+endif
+endif
+
+Q= @
+E= @echo
+#Q=
+#E= @:
+
+##############################################################################
+# Make targets.
+##############################################################################
+
+default all:	$(TARGET_T)
+
+amalg:
+	@grep "^[+|]" ljamalg.c
+	$(MAKE) all "LJCORE_O=ljamalg.o"
+
+clean:
+	$(HOST_RM) $(ALL_RM)
+
+depend:
+	@for file in $(ALL_HDRGEN); do \
+	  test -f $$file || touch $$file; \
+	  done
+	@$(HOST_CC) $(HOST_ACFLAGS) -MM *.c host/*.c | \
+	  sed -e "s| [^ ]*/dasm_\S*\.h||g" \
+	      -e "s|^\([^l ]\)|host/\1|" \
+	      -e "s| lj_target_\S*\.h| lj_target_*.h|g" \
+	      -e "s| lj_emit_\S*\.h| lj_emit_*.h|g" \
+	      -e "s| lj_asm_\S*\.h| lj_asm_*.h|g" >Makefile.dep
+	@for file in $(ALL_HDRGEN); do \
+	  test -s $$file || $(HOST_RM) $$file; \
+	  done
+
+.PHONY: default all amalg clean depend
+
+##############################################################################
+# Rules for generated files.
+##############################################################################
+
+$(MINILUA_T): $(MINILUA_O)
+	$(E) "HOSTLINK  $@"
+	$(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
+
+host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP)
+	$(E) "DYNASM    $@"
+	$(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
+
+host/buildvm.o: $(DASM_DIR)/dasm_*.h
+
+$(BUILDVM_T): $(BUILDVM_O)
+	$(E) "HOSTLINK  $@"
+	$(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(BUILDVM_O) $(HOST_ALIBS)
+
+$(LJVM_BOUT): $(BUILDVM_T)
+	$(E) "BUILDVM   $@"
+	$(Q)$(BUILDVM_X) -m $(LJVM_MODE) -o $@
+
+lj_bcdef.h: $(BUILDVM_T) $(LJLIB_C)
+	$(E) "BUILDVM   $@"
+	$(Q)$(BUILDVM_X) -m bcdef -o $@ $(LJLIB_C)
+
+lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C)
+	$(E) "BUILDVM   $@"
+	$(Q)$(BUILDVM_X) -m ffdef -o $@ $(LJLIB_C)
+
+lj_libdef.h: $(BUILDVM_T) $(LJLIB_C)
+	$(E) "BUILDVM   $@"
+	$(Q)$(BUILDVM_X) -m libdef -o $@ $(LJLIB_C)
+
+lj_recdef.h: $(BUILDVM_T) $(LJLIB_C)
+	$(E) "BUILDVM   $@"
+	$(Q)$(BUILDVM_X) -m recdef -o $@ $(LJLIB_C)
+
+$(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C)
+	$(E) "BUILDVM   $@"
+	$(Q)$(BUILDVM_X) -m vmdef -o $(LIB_VMDEFP) $(LJLIB_C)
+
+lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
+	$(E) "BUILDVM   $@"
+	$(Q)$(BUILDVM_X) -m folddef -o $@ lj_opt_fold.c
+
+##############################################################################
+# Object file rules.
+##############################################################################
+
+%.o: %.c
+	$(E) "CC        $@"
+	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
+	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
+
+%.o: %.s
+	$(E) "ASM       $@"
+	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
+	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
+
+$(LUAJIT_O):
+	$(E) "CC        $@"
+	$(Q)$(TARGET_STCC) $(TARGET_ACFLAGS) -c -o $@ $<
+
+$(HOST_O): %.o: %.c
+	$(E) "HOSTCC    $@"
+	$(Q)$(HOST_CC) $(HOST_ACFLAGS) -c -o $@ $<
+
+include Makefile.dep
+
+##############################################################################
+# Target file rules.
+##############################################################################
+
+$(LUAJIT_A): $(LJVMCORE_O)
+	$(E) "AR        $@"
+	$(Q)$(TARGET_AR) $@ $(LJVMCORE_O)
+
+# The dependency on _O, but linking with _DYNO is intentional.
+$(LUAJIT_SO): $(LJVMCORE_O)
+	$(E) "DYNLINK   $@"
+	$(Q)$(TARGET_LD) $(TARGET_ASHLDFLAGS) -o $@ $(LJVMCORE_DYNO) $(TARGET_ALIBS)
+	$(Q)$(TARGET_STRIP) $@
+
+$(LUAJIT_T): $(TARGET_O) $(LUAJIT_O) $(TARGET_DEP)
+	$(E) "LINK      $@"
+	$(Q)$(TARGET_LD) $(TARGET_ALDFLAGS) -o $@ $(LUAJIT_O) $(TARGET_O) $(TARGET_ALIBS)
+	$(Q)$(TARGET_STRIP) $@
+	$(E) "OK        Successfully built LuaJIT"
+
+##############################################################################

+ 226 - 0
luajit.mod/luajit/src/Makefile.dep

@@ -0,0 +1,226 @@
+lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h
+lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
+ lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
+ lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
+ lj_lib.h lj_libdef.h
+lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
+lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
+ lj_libdef.h
+lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
+ lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
+ lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
+lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \
+ lj_ffdef.h lj_lib.h lj_libdef.h
+lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
+ lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
+ lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \
+ lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \
+ lj_libdef.h
+lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
+lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
+lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
+lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
+ lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \
+ lj_lib.h lj_libdef.h
+lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \
+ lj_libdef.h
+lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
+lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
+ lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
+ lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h
+lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
+ lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \
+ lj_asm_*.h
+lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
+ lj_bcdef.h
+lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \
+ lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h
+lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \
+ lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \
+ lj_cdata.h lj_carith.h
+lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
+ lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_traceerr.h
+lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
+ lj_bc.h lj_ctype.h lj_cconv.h lj_ccall.h lj_ccallback.h lj_target.h \
+ lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h
+lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
+ lj_ccallback.h
+lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
+ lj_cdata.h
+lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
+lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
+ lj_cdata.h lj_clib.h
+lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_char.h lj_strscan.h
+lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
+ lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \
+ lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
+ lj_crecord.h
+lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h
+lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_jit.h lj_ir.h
+lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \
+ lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \
+ lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h luajit.h
+lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
+ lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
+ lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h
+lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
+ lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
+ lj_vm.h lj_strscan.h lj_recdef.h
+lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_traceerr.h lj_vm.h
+lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
+ lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \
+ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
+lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \
+ lj_ir.h lj_dispatch.h
+lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
+ lj_vm.h lj_strscan.h lj_lib.h
+lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
+ lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
+lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
+ lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h
+lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
+lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
+lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
+ lj_vm.h lj_strscan.h
+lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
+lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_ir.h lj_jit.h lj_iropt.h
+lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
+ lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \
+ lj_strscan.h lj_folddef.h
+lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
+lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
+lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h lj_strscan.h
+lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
+lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \
+ lj_iropt.h lj_vm.h
+lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \
+ lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
+lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
+ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \
+ lj_ffrecord.h lj_snap.h lj_vm.h
+lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
+ lj_target_*.h lj_ctype.h lj_cdata.h
+lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
+ lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \
+ lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
+lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h
+lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_char.h lj_strscan.h
+lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_tab.h
+lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \
+ lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \
+ lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h
+lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_udata.h
+lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
+ lj_vm.h lj_vmevent.h
+lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_ir.h lj_vm.h
+ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \
+ lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \
+ lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \
+ lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \
+ lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \
+ lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \
+ luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \
+ lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
+ lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
+ lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
+ lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
+ lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \
+ lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
+ lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
+ lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
+ lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
+ lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
+ lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
+ lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
+ lib_init.c
+luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
+host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
+ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
+ lj_ircall.h lj_ir.h lj_jit.h lj_frame.h lj_bc.h lj_dispatch.h lj_ctype.h \
+ lj_gc.h lj_ccall.h lj_ctype.h luajit.h \
+ host/buildvm_arch.h lj_traceerr.h
+host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
+ lj_arch.h lj_bc.h lj_def.h lj_arch.h
+host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
+ luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
+host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
+ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h
+host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
+ luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
+host/minilua.o: host/minilua.c

+ 4 - 0
luajit.mod/luajit/src/host/README

@@ -0,0 +1,4 @@
+The files in this directory are only used during the build process of LuaJIT.
+For cross-compilation, they must be executed on the host, not on the target.
+
+These files should NOT be installed!

+ 516 - 0
luajit.mod/luajit/src/host/buildvm.c

@@ -0,0 +1,516 @@
+/*
+** LuaJIT VM builder.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** This is a tool to build the hand-tuned assembler code required for
+** LuaJIT's bytecode interpreter. It supports a variety of output formats
+** to feed different toolchains (see usage() below).
+**
+** This tool is not particularly optimized because it's only used while
+** _building_ LuaJIT. There's no point in distributing or installing it.
+** Only the object code generated by this tool is linked into LuaJIT.
+**
+** Caveat: some memory is not free'd, error handling is lazy.
+** It's a one-shot tool -- any effort fixing this would be wasted.
+*/
+
+#include "buildvm.h"
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_bc.h"
+#include "lj_ir.h"
+#include "lj_ircall.h"
+#include "lj_frame.h"
+#include "lj_dispatch.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_ccall.h"
+#endif
+#include "luajit.h"
+
+#if defined(_WIN32)
+#include <fcntl.h>
+#include <io.h>
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+/* DynASM glue definitions. */
+#define Dst		ctx
+#define Dst_DECL	BuildCtx *ctx
+#define Dst_REF		(ctx->D)
+#define DASM_CHECKS	1
+
+#include "../dynasm/dasm_proto.h"
+
+/* Glue macros for DynASM. */
+static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
+
+#define DASM_EXTERN(ctx, addr, idx, type) \
+  collect_reloc(ctx, addr, idx, type)
+
+/* ------------------------------------------------------------------------ */
+
+/* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */
+#define DASM_ALIGNED_WRITES	1
+
+/* Embed architecture-specific DynASM encoder. */
+#if LJ_TARGET_X86ORX64
+#include "../dynasm/dasm_x86.h"
+#elif LJ_TARGET_ARM
+#include "../dynasm/dasm_arm.h"
+#elif LJ_TARGET_PPC
+#include "../dynasm/dasm_ppc.h"
+#elif LJ_TARGET_PPCSPE
+#include "../dynasm/dasm_ppc.h"
+#elif LJ_TARGET_MIPS
+#include "../dynasm/dasm_mips.h"
+#else
+#error "No support for this architecture (yet)"
+#endif
+
+/* Embed generated architecture-specific backend. */
+#include "buildvm_arch.h"
+
+/* ------------------------------------------------------------------------ */
+
+void owrite(BuildCtx *ctx, const void *ptr, size_t sz)
+{
+  if (fwrite(ptr, 1, sz, ctx->fp) != sz) {
+    fprintf(stderr, "Error: cannot write to output file: %s\n",
+	    strerror(errno));
+    exit(1);
+  }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Emit code as raw bytes. Only used for DynASM debugging. */
+static void emit_raw(BuildCtx *ctx)
+{
+  owrite(ctx, ctx->code, ctx->codesz);
+}
+
+/* -- Build machine code -------------------------------------------------- */
+
+static const char *sym_decorate(BuildCtx *ctx,
+				const char *prefix, const char *suffix)
+{
+  char name[256];
+  char *p;
+#if LJ_64
+  const char *symprefix = ctx->mode == BUILD_machasm ? "_" : "";
+#elif LJ_TARGET_XBOX360
+  const char *symprefix = "";
+#else
+  const char *symprefix = ctx->mode != BUILD_elfasm ? "_" : "";
+#endif
+  sprintf(name, "%s%s%s", symprefix, prefix, suffix);
+  p = strchr(name, '@');
+  if (p) {
+#if LJ_TARGET_X86ORX64
+    if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
+      name[0] = '@';
+    else
+      *p = '\0';
+#elif (LJ_TARGET_PPC  || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE
+    /* Keep @plt. */
+#else
+    *p = '\0';
+#endif
+  }
+  p = (char *)malloc(strlen(name)+1);  /* MSVC doesn't like strdup. */
+  strcpy(p, name);
+  return p;
+}
+
+#define NRELOCSYM	(sizeof(extnames)/sizeof(extnames[0])-1)
+
+static int relocmap[NRELOCSYM];
+
+/* Collect external relocations. */
+static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type)
+{
+  if (ctx->nreloc >= BUILD_MAX_RELOC) {
+    fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n");
+    exit(1);
+  }
+  if (relocmap[idx] < 0) {
+    relocmap[idx] = ctx->nrelocsym;
+    ctx->relocsym[ctx->nrelocsym] = sym_decorate(ctx, "", extnames[idx]);
+    ctx->nrelocsym++;
+  }
+  ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code);
+  ctx->reloc[ctx->nreloc].sym = relocmap[idx];
+  ctx->reloc[ctx->nreloc].type = type;
+  ctx->nreloc++;
+#if LJ_TARGET_XBOX360
+  return (int)(ctx->code - addr) + 4;  /* Encode symbol offset of .text. */
+#else
+  return 0;  /* Encode symbol offset of 0. */
+#endif
+}
+
+/* Naive insertion sort. Performance doesn't matter here. */
+static void sym_insert(BuildCtx *ctx, int32_t ofs,
+		       const char *prefix, const char *suffix)
+{
+  ptrdiff_t i = ctx->nsym++;
+  while (i > 0) {
+    if (ctx->sym[i-1].ofs <= ofs)
+      break;
+    ctx->sym[i] = ctx->sym[i-1];
+    i--;
+  }
+  ctx->sym[i].ofs = ofs;
+  ctx->sym[i].name = sym_decorate(ctx, prefix, suffix);
+}
+
+/* Build the machine code. */
+static int build_code(BuildCtx *ctx)
+{
+  int status;
+  int i;
+
+  /* Initialize DynASM structures. */
+  ctx->nglob = GLOB__MAX;
+  ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *));
+  memset(ctx->glob, 0, ctx->nglob*sizeof(void *));
+  ctx->nreloc = 0;
+
+  ctx->globnames = globnames;
+  ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
+  ctx->nrelocsym = 0;
+  for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
+
+  ctx->dasm_ident = DASM_IDENT;
+  ctx->dasm_arch = DASM_ARCH;
+
+  dasm_init(Dst, DASM_MAXSECTION);
+  dasm_setupglobal(Dst, ctx->glob, ctx->nglob);
+  dasm_setup(Dst, build_actionlist);
+
+  /* Call arch-specific backend to emit the code. */
+  ctx->npc = build_backend(ctx);
+
+  /* Finalize the code. */
+  (void)dasm_checkstep(Dst, -1);
+  if ((status = dasm_link(Dst, &ctx->codesz))) return status;
+  ctx->code = (uint8_t *)malloc(ctx->codesz);
+  if ((status = dasm_encode(Dst, (void *)ctx->code))) return status;
+
+  /* Allocate symbol table and bytecode offsets. */
+  ctx->beginsym = sym_decorate(ctx, "", LABEL_PREFIX "vm_asm_begin");
+  ctx->sym = (BuildSym *)malloc((ctx->npc+ctx->nglob+1)*sizeof(BuildSym));
+  ctx->nsym = 0;
+  ctx->bc_ofs = (int32_t *)malloc(ctx->npc*sizeof(int32_t));
+
+  /* Collect the opcodes (PC labels). */
+  for (i = 0; i < ctx->npc; i++) {
+    int32_t ofs = dasm_getpclabel(Dst, i);
+    if (ofs < 0) return 0x22000000|i;
+    ctx->bc_ofs[i] = ofs;
+    if ((LJ_HASJIT ||
+	 !(i == BC_JFORI || i == BC_JFORL || i == BC_JITERL || i == BC_JLOOP ||
+	   i == BC_IFORL || i == BC_IITERL || i == BC_ILOOP)) &&
+	(LJ_HASFFI || i != BC_KCDATA))
+      sym_insert(ctx, ofs, LABEL_PREFIX_BC, bc_names[i]);
+  }
+
+  /* Collect the globals (named labels). */
+  for (i = 0; i < ctx->nglob; i++) {
+    const char *gl = globnames[i];
+    int len = (int)strlen(gl);
+    if (!ctx->glob[i]) {
+      fprintf(stderr, "Error: undefined global %s\n", gl);
+      exit(2);
+    }
+    /* Skip the _Z symbols. */
+    if (!(len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z'))
+      sym_insert(ctx, (int32_t)((uint8_t *)(ctx->glob[i]) - ctx->code),
+		 LABEL_PREFIX, globnames[i]);
+  }
+
+  /* Close the address range. */
+  sym_insert(ctx, (int32_t)ctx->codesz, "", "");
+  ctx->nsym--;
+
+  dasm_free(Dst);
+
+  return 0;
+}
+
+/* -- Generate VM enums --------------------------------------------------- */
+
+const char *const bc_names[] = {
+#define BCNAME(name, ma, mb, mc, mt)       #name,
+BCDEF(BCNAME)
+#undef BCNAME
+  NULL
+};
+
+const char *const ir_names[] = {
+#define IRNAME(name, m, m1, m2)	#name,
+IRDEF(IRNAME)
+#undef IRNAME
+  NULL
+};
+
+const char *const irt_names[] = {
+#define IRTNAME(name, size)	#name,
+IRTDEF(IRTNAME)
+#undef IRTNAME
+  NULL
+};
+
+const char *const irfpm_names[] = {
+#define FPMNAME(name)		#name,
+IRFPMDEF(FPMNAME)
+#undef FPMNAME
+  NULL
+};
+
+const char *const irfield_names[] = {
+#define FLNAME(name, ofs)	#name,
+IRFLDEF(FLNAME)
+#undef FLNAME
+  NULL
+};
+
+const char *const ircall_names[] = {
+#define IRCALLNAME(cond, name, nargs, kind, type, flags)	#name,
+IRCALLDEF(IRCALLNAME)
+#undef IRCALLNAME
+  NULL
+};
+
+static const char *const trace_errors[] = {
+#define TREDEF(name, msg)	msg,
+#include "lj_traceerr.h"
+  NULL
+};
+
+static const char *lower(char *buf, const char *s)
+{
+  char *p = buf;
+  while (*s) {
+    *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s;
+    s++;
+  }
+  *p = '\0';
+  return buf;
+}
+
+/* Emit C source code for bytecode-related definitions. */
+static void emit_bcdef(BuildCtx *ctx)
+{
+  int i;
+  fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n");
+  fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_ofs[] = {\n");
+  for (i = 0; i < ctx->npc; i++) {
+    if (i != 0)
+      fprintf(ctx->fp, ",\n");
+    fprintf(ctx->fp, "%d", ctx->bc_ofs[i]);
+  }
+}
+
+/* Emit VM definitions as Lua code for debug modules. */
+static void emit_vmdef(BuildCtx *ctx)
+{
+  char buf[80];
+  int i;
+  fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
+  fprintf(ctx->fp, "module(...)\n\n");
+
+  fprintf(ctx->fp, "bcnames = \"");
+  for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
+  fprintf(ctx->fp, "\"\n\n");
+
+  fprintf(ctx->fp, "irnames = \"");
+  for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
+  fprintf(ctx->fp, "\"\n\n");
+
+  fprintf(ctx->fp, "irfpm = { [0]=");
+  for (i = 0; irfpm_names[i]; i++)
+    fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
+  fprintf(ctx->fp, "}\n\n");
+
+  fprintf(ctx->fp, "irfield = { [0]=");
+  for (i = 0; irfield_names[i]; i++) {
+    char *p;
+    lower(buf, irfield_names[i]);
+    p = strchr(buf, '_');
+    if (p) *p = '.';
+    fprintf(ctx->fp, "\"%s\", ", buf);
+  }
+  fprintf(ctx->fp, "}\n\n");
+
+  fprintf(ctx->fp, "ircall = {\n[0]=");
+  for (i = 0; ircall_names[i]; i++)
+    fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
+  fprintf(ctx->fp, "}\n\n");
+
+  fprintf(ctx->fp, "traceerr = {\n[0]=");
+  for (i = 0; trace_errors[i]; i++)
+    fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
+  fprintf(ctx->fp, "}\n\n");
+}
+
+/* -- Argument parsing ---------------------------------------------------- */
+
+/* Build mode names. */
+static const char *const modenames[] = {
+#define BUILDNAME(name)		#name,
+BUILDDEF(BUILDNAME)
+#undef BUILDNAME
+  NULL
+};
+
+/* Print usage information and exit. */
+static void usage(void)
+{
+  int i;
+  fprintf(stderr, LUAJIT_VERSION " VM builder.\n");
+  fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n");
+  fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n");
+  fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n");
+  fprintf(stderr, "Available modes:\n");
+  for (i = 0; i < BUILD__MAX; i++)
+    fprintf(stderr, "  %s\n", modenames[i]);
+  exit(1);
+}
+
+/* Parse the output mode name. */
+static BuildMode parsemode(const char *mode)
+{
+  int i;
+  for (i = 0; modenames[i]; i++)
+    if (!strcmp(mode, modenames[i]))
+      return (BuildMode)i;
+  usage();
+  return (BuildMode)-1;
+}
+
+/* Parse arguments. */
+static void parseargs(BuildCtx *ctx, char **argv)
+{
+  const char *a;
+  int i;
+  ctx->mode = (BuildMode)-1;
+  ctx->outname = "-";
+  for (i = 1; (a = argv[i]) != NULL; i++) {
+    if (a[0] != '-')
+      break;
+    switch (a[1]) {
+    case '-':
+      if (a[2]) goto err;
+      i++;
+      goto ok;
+    case '\0':
+      goto ok;
+    case 'm':
+      i++;
+      if (a[2] || argv[i] == NULL) goto err;
+      ctx->mode = parsemode(argv[i]);
+      break;
+    case 'o':
+      i++;
+      if (a[2] || argv[i] == NULL) goto err;
+      ctx->outname = argv[i];
+      break;
+    default: err:
+      usage();
+      break;
+    }
+  }
+ok:
+  ctx->args = argv+i;
+  if (ctx->mode == (BuildMode)-1) goto err;
+}
+
+int main(int argc, char **argv)
+{
+  BuildCtx ctx_;
+  BuildCtx *ctx = &ctx_;
+  int status, binmode;
+
+  if (sizeof(void *) != 4*LJ_32+8*LJ_64) {
+    fprintf(stderr,"Error: pointer size mismatch in cross-build.\n");
+    fprintf(stderr,"Try: make HOST_CC=\"gcc -m32\" CROSS=...\n\n");
+    return 1;
+  }
+
+  UNUSED(argc);
+  parseargs(ctx, argv);
+
+  if ((status = build_code(ctx))) {
+    fprintf(stderr,"Error: DASM error %08x\n", status);
+    return 1;
+  }
+
+  switch (ctx->mode) {
+  case BUILD_peobj:
+  case BUILD_raw:
+    binmode = 1;
+    break;
+  default:
+    binmode = 0;
+    break;
+  }
+
+  if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') {
+    ctx->fp = stdout;
+#if defined(_WIN32)
+    if (binmode)
+      _setmode(_fileno(stdout), _O_BINARY);  /* Yuck. */
+#endif
+  } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w"))) {
+    fprintf(stderr, "Error: cannot open output file '%s': %s\n",
+	    ctx->outname, strerror(errno));
+    exit(1);
+  }
+
+  switch (ctx->mode) {
+  case BUILD_elfasm:
+  case BUILD_coffasm:
+  case BUILD_machasm:
+    emit_asm(ctx);
+    emit_asm_debug(ctx);
+    break;
+  case BUILD_peobj:
+    emit_peobj(ctx);
+    break;
+  case BUILD_raw:
+    emit_raw(ctx);
+    break;
+  case BUILD_bcdef:
+    emit_bcdef(ctx);
+    emit_lib(ctx);
+    break;
+  case BUILD_vmdef:
+    emit_vmdef(ctx);
+    emit_lib(ctx);
+    break;
+  case BUILD_ffdef:
+  case BUILD_libdef:
+  case BUILD_recdef:
+    emit_lib(ctx);
+    break;
+  case BUILD_folddef:
+    emit_fold(ctx);
+    break;
+  default:
+    break;
+  }
+
+  fflush(ctx->fp);
+  if (ferror(ctx->fp)) {
+    fprintf(stderr, "Error: cannot write to output file: %s\n",
+	    strerror(errno));
+    exit(1);
+  }
+  fclose(ctx->fp);
+
+  return 0;
+}
+

+ 104 - 0
luajit.mod/luajit/src/host/buildvm.h

@@ -0,0 +1,104 @@
+/*
+** LuaJIT VM builder.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _BUILDVM_H
+#define _BUILDVM_H
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "lj_def.h"
+#include "lj_arch.h"
+
+/* Hardcoded limits. Increase as needed. */
+#define BUILD_MAX_RELOC		200	/* Max. number of relocations. */
+#define BUILD_MAX_FOLD		4096	/* Max. number of fold rules. */
+
+/* Prefix for scanned library definitions. */
+#define LIBDEF_PREFIX		"LJLIB_"
+
+/* Prefix for scanned fold definitions. */
+#define FOLDDEF_PREFIX		"LJFOLD"
+
+/* Prefixes for generated labels. */
+#define LABEL_PREFIX		"lj_"
+#define LABEL_PREFIX_BC		LABEL_PREFIX "BC_"
+#define LABEL_PREFIX_FF		LABEL_PREFIX "ff_"
+#define LABEL_PREFIX_CF		LABEL_PREFIX "cf_"
+#define LABEL_PREFIX_FFH	LABEL_PREFIX "ffh_"
+#define LABEL_PREFIX_LIBCF	LABEL_PREFIX "lib_cf_"
+#define LABEL_PREFIX_LIBINIT	LABEL_PREFIX "lib_init_"
+
+/* Forward declaration. */
+struct dasm_State;
+
+/* Build modes. */
+#define BUILDDEF(_) \
+  _(elfasm) _(coffasm) _(machasm) _(peobj) _(raw) \
+  _(bcdef) _(ffdef) _(libdef) _(recdef) _(vmdef) \
+  _(folddef)
+
+typedef enum {
+#define BUILDENUM(name)		BUILD_##name,
+BUILDDEF(BUILDENUM)
+#undef BUILDENUM
+  BUILD__MAX
+} BuildMode;
+
+/* Code relocation. */
+typedef struct BuildReloc {
+  int32_t ofs;
+  int sym;
+  int type;
+} BuildReloc;
+
+typedef struct BuildSym {
+  const char *name;
+  int32_t ofs;
+} BuildSym;
+
+/* Build context structure. */
+typedef struct BuildCtx {
+  /* DynASM state pointer. Should be first member. */
+  struct dasm_State *D;
+  /* Parsed command line. */
+  BuildMode mode;
+  FILE *fp;
+  const char *outname;
+  char **args;
+  /* Code and symbols generated by DynASM. */
+  uint8_t *code;
+  size_t codesz;
+  int npc, nglob, nsym, nreloc, nrelocsym;
+  void **glob;
+  BuildSym *sym;
+  const char **relocsym;
+  int32_t *bc_ofs;
+  const char *beginsym;
+  /* Strings generated by DynASM. */
+  const char *const *globnames;
+  const char *dasm_ident;
+  const char *dasm_arch;
+  /* Relocations. */
+  BuildReloc reloc[BUILD_MAX_RELOC];
+} BuildCtx;
+
+extern void owrite(BuildCtx *ctx, const void *ptr, size_t sz);
+extern void emit_asm(BuildCtx *ctx);
+extern void emit_peobj(BuildCtx *ctx);
+extern void emit_lib(BuildCtx *ctx);
+extern void emit_fold(BuildCtx *ctx);
+
+extern const char *const bc_names[];
+extern const char *const ir_names[];
+extern const char *const irt_names[];
+extern const char *const irfpm_names[];
+extern const char *const irfield_names[];
+extern const char *const ircall_names[];
+
+#endif

+ 313 - 0
luajit.mod/luajit/src/host/buildvm_asm.c

@@ -0,0 +1,313 @@
+/*
+** LuaJIT VM builder: Assembler source code emitter.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include "buildvm.h"
+#include "lj_bc.h"
+
+/* ------------------------------------------------------------------------ */
+
+#if LJ_TARGET_X86ORX64
+/* Emit bytes piecewise as assembler text. */
+static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n)
+{
+  int i;
+  for (i = 0; i < n; i++) {
+    if ((i & 15) == 0)
+      fprintf(ctx->fp, "\t.byte %d", p[i]);
+    else
+      fprintf(ctx->fp, ",%d", p[i]);
+    if ((i & 15) == 15) putc('\n', ctx->fp);
+  }
+  if ((n & 15) != 0) putc('\n', ctx->fp);
+}
+
+/* Emit relocation */
+static void emit_asm_reloc(BuildCtx *ctx, int type, const char *sym)
+{
+  switch (ctx->mode) {
+  case BUILD_elfasm:
+    if (type)
+      fprintf(ctx->fp, "\t.long %s-.-4\n", sym);
+    else
+      fprintf(ctx->fp, "\t.long %s\n", sym);
+    break;
+  case BUILD_coffasm:
+    fprintf(ctx->fp, "\t.def %s; .scl 3; .type 32; .endef\n", sym);
+    if (type)
+      fprintf(ctx->fp, "\t.long %s-.-4\n", sym);
+    else
+      fprintf(ctx->fp, "\t.long %s\n", sym);
+    break;
+  default:  /* BUILD_machasm for relative relocations handled below. */
+    fprintf(ctx->fp, "\t.long %s\n", sym);
+    break;
+  }
+}
+
+static const char *const jccnames[] = {
+  "jo", "jno", "jb", "jnb", "jz", "jnz", "jbe", "ja",
+  "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
+};
+
+/* Emit relocation for the incredibly stupid OSX assembler. */
+static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
+				const char *sym)
+{
+  const char *opname = NULL;
+  if (--n < 0) goto err;
+  if (cp[n] == 0xe8) {
+    opname = "call";
+  } else if (cp[n] == 0xe9) {
+    opname = "jmp";
+  } else if (cp[n] >= 0x80 && cp[n] <= 0x8f && n > 0 && cp[n-1] == 0x0f) {
+    opname = jccnames[cp[n]-0x80];
+    n--;
+  } else {
+err:
+    fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
+	    sym);
+    exit(1);
+  }
+  emit_asm_bytes(ctx, cp, n);
+  fprintf(ctx->fp, "\t%s %s\n", opname, sym);
+}
+#else
+/* Emit words piecewise as assembler text. */
+static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
+{
+  int i;
+  for (i = 0; i < n; i += 4) {
+    if ((i & 15) == 0)
+      fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i));
+    else
+      fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i));
+    if ((i & 15) == 12) putc('\n', ctx->fp);
+  }
+  if ((n & 15) != 0) putc('\n', ctx->fp);
+}
+
+/* Emit relocation as part of an instruction. */
+static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
+			       const char *sym)
+{
+  uint32_t ins;
+  emit_asm_words(ctx, p, n-4);
+  ins = *(uint32_t *)(p+n-4);
+#if LJ_TARGET_ARM
+  if ((ins & 0xff000000u) == 0xfa000000u) {
+    fprintf(ctx->fp, "\tblx %s\n", sym);
+  } else if ((ins & 0x0e000000u) == 0x0a000000u) {
+    fprintf(ctx->fp, "\t%s%.2s %s\n", (ins & 0x01000000u) ? "bl" : "b",
+	    &"eqnecsccmiplvsvchilsgeltgtle"[2*(ins >> 28)], sym);
+  } else {
+    fprintf(stderr,
+	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
+	    ins, sym);
+    exit(1);
+  }
+#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#if LJ_TARGET_PS3
+#define TOCPREFIX "."
+#else
+#define TOCPREFIX ""
+#endif
+  if ((ins >> 26) == 16) {
+    fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
+	    (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
+  } else if ((ins >> 26) == 18) {
+    fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
+  } else {
+    fprintf(stderr,
+	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
+	    ins, sym);
+    exit(1);
+  }
+#elif LJ_TARGET_MIPS
+  fprintf(stderr,
+	  "Error: unsupported opcode %08x for %s symbol relocation.\n",
+	  ins, sym);
+  exit(1);
+#else
+#error "missing relocation support for this architecture"
+#endif
+}
+#endif
+
+#if LJ_TARGET_ARM
+#define ELFASM_PX	"%%"
+#else
+#define ELFASM_PX	"@"
+#endif
+
+/* Emit an assembler label. */
+static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc)
+{
+  switch (ctx->mode) {
+  case BUILD_elfasm:
+#if LJ_TARGET_PS3
+    if (!strncmp(name, "lj_vm_", 6) &&
+	strcmp(name, ctx->beginsym) &&
+	!strstr(name, "hook")) {
+      fprintf(ctx->fp,
+	"\n\t.globl %s\n"
+	"\t.section \".opd\",\"aw\"\n"
+	"%s:\n"
+	"\t.long .%s,.TOC.@tocbase32\n"
+	"\t.size %s,8\n"
+	"\t.previous\n"
+	"\t.globl .%s\n"
+	"\t.hidden .%s\n"
+	"\t.type .%s, " ELFASM_PX "function\n"
+	"\t.size .%s, %d\n"
+	".%s:\n",
+	name, name, name, name, name, name, name, name, size, name);
+      break;
+    }
+#endif
+    fprintf(ctx->fp,
+      "\n\t.globl %s\n"
+      "\t.hidden %s\n"
+      "\t.type %s, " ELFASM_PX "%s\n"
+      "\t.size %s, %d\n"
+      "%s:\n",
+      name, name, name, isfunc ? "function" : "object", name, size, name);
+    break;
+  case BUILD_coffasm:
+    fprintf(ctx->fp, "\n\t.globl %s\n", name);
+    if (isfunc)
+      fprintf(ctx->fp, "\t.def %s; .scl 3; .type 32; .endef\n", name);
+    fprintf(ctx->fp, "%s:\n", name);
+    break;
+  case BUILD_machasm:
+    fprintf(ctx->fp,
+      "\n\t.private_extern %s\n"
+      "%s:\n", name, name);
+    break;
+  default:
+    break;
+  }
+}
+
+/* Emit alignment. */
+static void emit_asm_align(BuildCtx *ctx, int bits)
+{
+  switch (ctx->mode) {
+  case BUILD_elfasm:
+  case BUILD_coffasm:
+    fprintf(ctx->fp, "\t.p2align %d\n", bits);
+    break;
+  case BUILD_machasm:
+    fprintf(ctx->fp, "\t.align %d\n", bits);
+    break;
+  default:
+    break;
+  }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Emit assembler source code. */
+void emit_asm(BuildCtx *ctx)
+{
+  int i, rel;
+
+  fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+  fprintf(ctx->fp, "\t.text\n");
+  emit_asm_align(ctx, 4);
+
+#if LJ_TARGET_PS3
+  emit_asm_label(ctx, ctx->beginsym, ctx->codesz, 0);
+#else
+  emit_asm_label(ctx, ctx->beginsym, 0, 0);
+#endif
+  if (ctx->mode != BUILD_machasm)
+    fprintf(ctx->fp, ".Lbegin:\n");
+
+#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
+  /* This should really be moved into buildvm_arm.dasc. */
+  fprintf(ctx->fp,
+	  ".fnstart\n"
+	  ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
+	  ".pad #28\n");
+#endif
+#if LJ_TARGET_MIPS
+  fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
+#endif
+
+  for (i = rel = 0; i < ctx->nsym; i++) {
+    int32_t ofs = ctx->sym[i].ofs;
+    int32_t next = ctx->sym[i+1].ofs;
+#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND && LJ_HASFFI
+    if (!strcmp(ctx->sym[i].name, "lj_vm_ffi_call"))
+      fprintf(ctx->fp,
+	      ".globl lj_err_unwind_arm\n"
+	      ".personality lj_err_unwind_arm\n"
+	      ".fnend\n"
+	      ".fnstart\n"
+	      ".save {r4, r5, r11, lr}\n"
+	      ".setfp r11, sp\n");
+#endif
+    emit_asm_label(ctx, ctx->sym[i].name, next - ofs, 1);
+    while (rel < ctx->nreloc && ctx->reloc[rel].ofs <= next) {
+      BuildReloc *r = &ctx->reloc[rel];
+      int n = r->ofs - ofs;
+#if LJ_TARGET_X86ORX64
+      if (ctx->mode == BUILD_machasm && r->type != 0) {
+	emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+      } else {
+	emit_asm_bytes(ctx, ctx->code+ofs, n);
+	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
+      }
+      ofs += n+4;
+#else
+      emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+      ofs += n;
+#endif
+      rel++;
+    }
+#if LJ_TARGET_X86ORX64
+    emit_asm_bytes(ctx, ctx->code+ofs, next-ofs);
+#else
+    emit_asm_words(ctx, ctx->code+ofs, next-ofs);
+#endif
+  }
+
+#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
+  fprintf(ctx->fp,
+#if !LJ_HASFFI
+	  ".globl lj_err_unwind_arm\n"
+	  ".personality lj_err_unwind_arm\n"
+#endif
+	  ".fnend\n");
+#endif
+
+  fprintf(ctx->fp, "\n");
+  switch (ctx->mode) {
+  case BUILD_elfasm:
+#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
+    fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
+#endif
+#if LJ_TARGET_PPCSPE
+    /* Soft-float ABI + SPE. */
+    fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
+#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
+    /* Hard-float ABI. */
+    fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
+#endif
+    /* fallthrough */
+  case BUILD_coffasm:
+    fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident);
+    break;
+  case BUILD_machasm:
+    fprintf(ctx->fp,
+      "\t.cstring\n"
+      "\t.ascii \"%s\\0\"\n", ctx->dasm_ident);
+    break;
+  default:
+    break;
+  }
+  fprintf(ctx->fp, "\n");
+}
+

+ 229 - 0
luajit.mod/luajit/src/host/buildvm_fold.c

@@ -0,0 +1,229 @@
+/*
+** LuaJIT VM builder: IR folding hash table generator.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include "buildvm.h"
+#include "lj_obj.h"
+#include "lj_ir.h"
+
+/* Context for the folding hash table generator. */
+static int lineno;
+static int funcidx;
+static uint32_t foldkeys[BUILD_MAX_FOLD];
+static uint32_t nkeys;
+
+/* Try to fill the hash table with keys using the hash parameters. */
+static int tryhash(uint32_t *htab, uint32_t sz, uint32_t r, int dorol)
+{
+  uint32_t i;
+  if (dorol && ((r & 31) == 0 || (r>>5) == 0))
+    return 0;  /* Avoid zero rotates. */
+  memset(htab, 0xff, (sz+1)*sizeof(uint32_t));
+  for (i = 0; i < nkeys; i++) {
+    uint32_t key = foldkeys[i];
+    uint32_t k = key & 0xffffff;
+    uint32_t h = (dorol ? lj_rol(lj_rol(k, r>>5) - k, r&31) :
+			  (((k << (r>>5)) - k) << (r&31))) % sz;
+    if (htab[h] != 0xffffffff) {  /* Collision on primary slot. */
+      if (htab[h+1] != 0xffffffff) {  /* Collision on secondary slot. */
+	/* Try to move the colliding key, if possible. */
+	if (h < sz-1 && htab[h+2] == 0xffffffff) {
+	  uint32_t k2 = htab[h+1] & 0xffffff;
+	  uint32_t h2 = (dorol ? lj_rol(lj_rol(k2, r>>5) - k2, r&31) :
+				 (((k2 << (r>>5)) - k2) << (r&31))) % sz;
+	  if (h2 != h+1) return 0;  /* Cannot resolve collision. */
+	  htab[h+2] = htab[h+1];  /* Move colliding key to secondary slot. */
+	} else {
+	  return 0;  /* Collision. */
+	}
+      }
+      htab[h+1] = key;
+    } else {
+      htab[h] = key;
+    }
+  }
+  return 1;  /* Success, all keys could be stored. */
+}
+
+/* Print the generated hash table. */
+static void printhash(BuildCtx *ctx, uint32_t *htab, uint32_t sz)
+{
+  uint32_t i;
+  fprintf(ctx->fp, "static const uint32_t fold_hash[%d] = {\n0x%08x",
+	  sz+1, htab[0]);
+  for (i = 1; i < sz+1; i++)
+    fprintf(ctx->fp, ",\n0x%08x", htab[i]);
+  fprintf(ctx->fp, "\n};\n\n");
+}
+
+/* Exhaustive search for the shortest semi-perfect hash table. */
+static void makehash(BuildCtx *ctx)
+{
+  uint32_t htab[BUILD_MAX_FOLD*2+1];
+  uint32_t sz, r;
+  /* Search for the smallest hash table with an odd size. */
+  for (sz = (nkeys|1); sz < BUILD_MAX_FOLD*2; sz += 2) {
+    /* First try all shift hash combinations. */
+    for (r = 0; r < 32*32; r++) {
+      if (tryhash(htab, sz, r, 0)) {
+	printhash(ctx, htab, sz);
+	fprintf(ctx->fp,
+		"#define fold_hashkey(k)\t(((((k)<<%u)-(k))<<%u)%%%u)\n\n",
+		r>>5, r&31, sz);
+	return;
+      }
+    }
+    /* Then try all rotate hash combinations. */
+    for (r = 0; r < 32*32; r++) {
+      if (tryhash(htab, sz, r, 1)) {
+	printhash(ctx, htab, sz);
+	fprintf(ctx->fp,
+	  "#define fold_hashkey(k)\t(lj_rol(lj_rol((k),%u)-(k),%u)%%%u)\n\n",
+		r>>5, r&31, sz);
+	return;
+      }
+    }
+  }
+  fprintf(stderr, "Error: search for perfect hash failed\n");
+  exit(1);
+}
+
+/* Parse one token of a fold rule. */
+static uint32_t nexttoken(char **pp, int allowlit, int allowany)
+{
+  char *p = *pp;
+  if (p) {
+    uint32_t i;
+    char *q = strchr(p, ' ');
+    if (q) *q++ = '\0';
+    *pp = q;
+    if (allowlit && !strncmp(p, "IRFPM_", 6)) {
+      for (i = 0; irfpm_names[i]; i++)
+	if (!strcmp(irfpm_names[i], p+6))
+	  return i;
+    } else if (allowlit && !strncmp(p, "IRFL_", 5)) {
+      for (i = 0; irfield_names[i]; i++)
+	if (!strcmp(irfield_names[i], p+5))
+	  return i;
+    } else if (allowlit && !strncmp(p, "IRCALL_", 7)) {
+      for (i = 0; ircall_names[i]; i++)
+	if (!strcmp(ircall_names[i], p+7))
+	  return i;
+    } else if (allowlit && !strncmp(p, "IRCONV_", 7)) {
+      for (i = 0; irt_names[i]; i++) {
+	const char *r = strchr(p+7, '_');
+	if (r && !strncmp(irt_names[i], p+7, r-(p+7))) {
+	  uint32_t j;
+	  for (j = 0; irt_names[j]; j++)
+	    if (!strcmp(irt_names[j], r+1))
+	      return (i << 5) + j;
+	}
+      }
+    } else if (allowlit && *p >= '0' && *p <= '9') {
+      for (i = 0; *p >= '0' && *p <= '9'; p++)
+	i = i*10 + (*p - '0');
+      if (*p == '\0')
+	return i;
+    } else if (allowany && !strcmp("any", p)) {
+      return allowany;
+    } else {
+      for (i = 0; ir_names[i]; i++)
+	if (!strcmp(ir_names[i], p))
+	  return i;
+    }
+    fprintf(stderr, "Error: bad fold definition token \"%s\" at line %d\n", p, lineno);
+    exit(1);
+  }
+  return 0;
+}
+
+/* Parse a fold rule. */
+static void foldrule(char *p)
+{
+  uint32_t op = nexttoken(&p, 0, 0);
+  uint32_t left = nexttoken(&p, 0, 0x7f);
+  uint32_t right = nexttoken(&p, 1, 0x3ff);
+  uint32_t key = (funcidx << 24) | (op << 17) | (left << 10) | right;
+  uint32_t i;
+  if (nkeys >= BUILD_MAX_FOLD) {
+    fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n");
+    exit(1);
+  }
+  /* Simple insertion sort to detect duplicates. */
+  for (i = nkeys; i > 0; i--) {
+    if ((foldkeys[i-1]&0xffffff) < (key & 0xffffff))
+      break;
+    if ((foldkeys[i-1]&0xffffff) == (key & 0xffffff)) {
+      fprintf(stderr, "Error: duplicate fold definition at line %d\n", lineno);
+      exit(1);
+    }
+    foldkeys[i] = foldkeys[i-1];
+  }
+  foldkeys[i] = key;
+  nkeys++;
+}
+
+/* Emit C source code for IR folding hash table. */
+void emit_fold(BuildCtx *ctx)
+{
+  char buf[256];  /* We don't care about analyzing lines longer than that. */
+  const char *fname = ctx->args[0];
+  FILE *fp;
+
+  if (fname == NULL) {
+    fprintf(stderr, "Error: missing input filename\n");
+    exit(1);
+  }
+
+  if (fname[0] == '-' && fname[1] == '\0') {
+    fp = stdin;
+  } else {
+    fp = fopen(fname, "r");
+    if (!fp) {
+      fprintf(stderr, "Error: cannot open input file '%s': %s\n",
+	      fname, strerror(errno));
+      exit(1);
+    }
+  }
+
+  fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n");
+  fprintf(ctx->fp, "static const FoldFunc fold_func[] = {\n");
+
+  lineno = 0;
+  funcidx = 0;
+  nkeys = 0;
+  while (fgets(buf, sizeof(buf), fp) != NULL) {
+    lineno++;
+    /* The prefix must be at the start of a line, otherwise it's ignored. */
+    if (!strncmp(buf, FOLDDEF_PREFIX, sizeof(FOLDDEF_PREFIX)-1)) {
+      char *p = buf+sizeof(FOLDDEF_PREFIX)-1;
+      char *q = strchr(p, ')');
+      if (p[0] == '(' && q) {
+	p++;
+	*q = '\0';
+	foldrule(p);
+      } else if ((p[0] == 'F' || p[0] == 'X') && p[1] == '(' && q) {
+	p += 2;
+	*q = '\0';
+	if (funcidx)
+	  fprintf(ctx->fp, ",\n");
+	if (p[-2] == 'X')
+	  fprintf(ctx->fp, "  %s", p);
+	else
+	  fprintf(ctx->fp, "  fold_%s", p);
+	funcidx++;
+      } else {
+	buf[strlen(buf)-1] = '\0';
+	fprintf(stderr, "Error: unknown fold definition tag %s%s at line %d\n",
+		FOLDDEF_PREFIX, p, lineno);
+	exit(1);
+      }
+    }
+  }
+  fclose(fp);
+  fprintf(ctx->fp, "\n};\n\n");
+
+  makehash(ctx);
+}
+

+ 398 - 0
luajit.mod/luajit/src/host/buildvm_lib.c

@@ -0,0 +1,398 @@
+/*
+** LuaJIT VM builder: library definition compiler.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include "buildvm.h"
+#include "lj_obj.h"
+#include "lj_lib.h"
+
+/* Context for library definitions. */
+static uint8_t obuf[8192];
+static uint8_t *optr;
+static char modname[80];
+static size_t modnamelen;
+static char funcname[80];
+static int modstate, regfunc;
+static int ffid, recffid, ffasmfunc;
+
+enum {
+  REGFUNC_OK,
+  REGFUNC_NOREG,
+  REGFUNC_NOREGUV
+};
+
+static void libdef_name(const char *p, int kind)
+{
+  size_t n = strlen(p);
+  if (kind != LIBINIT_STRING) {
+    if (n > modnamelen && p[modnamelen] == '_' &&
+	!strncmp(p, modname, modnamelen)) {
+      p += modnamelen+1;
+      n -= modnamelen+1;
+    }
+  }
+  if (n > LIBINIT_MAXSTR) {
+    fprintf(stderr, "Error: string too long: '%s'\n",  p);
+    exit(1);
+  }
+  if (optr+1+n+2 > obuf+sizeof(obuf)) {  /* +2 for caller. */
+    fprintf(stderr, "Error: output buffer overflow\n");
+    exit(1);
+  }
+  *optr++ = (uint8_t)(n | kind);
+  memcpy(optr, p, n);
+  optr += n;
+}
+
+static void libdef_endmodule(BuildCtx *ctx)
+{
+  if (modstate != 0) {
+    char line[80];
+    const uint8_t *p;
+    int n;
+    if (modstate == 1)
+      fprintf(ctx->fp, "  (lua_CFunction)0");
+    fprintf(ctx->fp, "\n};\n");
+    fprintf(ctx->fp, "static const uint8_t %s%s[] = {\n",
+	    LABEL_PREFIX_LIBINIT, modname);
+    line[0] = '\0';
+    for (n = 0, p = obuf; p < optr; p++) {
+      n += sprintf(line+n, "%d,", *p);
+      if (n >= 75) {
+	fprintf(ctx->fp, "%s\n", line);
+	n = 0;
+	line[0] = '\0';
+      }
+    }
+    fprintf(ctx->fp, "%s%d\n};\n#endif\n\n", line, LIBINIT_END);
+  }
+}
+
+static void libdef_module(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(arg);
+  if (ctx->mode == BUILD_libdef) {
+    libdef_endmodule(ctx);
+    optr = obuf;
+    *optr++ = (uint8_t)ffid;
+    *optr++ = (uint8_t)ffasmfunc;
+    *optr++ = 0;  /* Hash table size. */
+    modstate = 1;
+    fprintf(ctx->fp, "#ifdef %sMODULE_%s\n", LIBDEF_PREFIX, p);
+    fprintf(ctx->fp, "#undef %sMODULE_%s\n", LIBDEF_PREFIX, p);
+    fprintf(ctx->fp, "static const lua_CFunction %s%s[] = {\n",
+	    LABEL_PREFIX_LIBCF, p);
+  }
+  modnamelen = strlen(p);
+  if (modnamelen > sizeof(modname)-1) {
+    fprintf(stderr, "Error: module name too long: '%s'\n", p);
+    exit(1);
+  }
+  strcpy(modname, p);
+}
+
+static int find_ffofs(BuildCtx *ctx, const char *name)
+{
+  int i;
+  for (i = 0; i < ctx->nglob; i++) {
+    const char *gl = ctx->globnames[i];
+    if (gl[0] == 'f' && gl[1] == 'f' && gl[2] == '_' && !strcmp(gl+3, name)) {
+      return (int)((uint8_t *)ctx->glob[i] - ctx->code);
+    }
+  }
+  fprintf(stderr, "Error: undefined fast function %s%s\n",
+	  LABEL_PREFIX_FF, name);
+  exit(1);
+}
+
+static void libdef_func(BuildCtx *ctx, char *p, int arg)
+{
+  if (arg != LIBINIT_CF)
+    ffasmfunc++;
+  if (ctx->mode == BUILD_libdef) {
+    if (modstate == 0) {
+      fprintf(stderr, "Error: no module for function definition %s\n", p);
+      exit(1);
+    }
+    if (regfunc == REGFUNC_NOREG) {
+      if (optr+1 > obuf+sizeof(obuf)) {
+	fprintf(stderr, "Error: output buffer overflow\n");
+	exit(1);
+      }
+      *optr++ = LIBINIT_FFID;
+    } else {
+      if (arg != LIBINIT_ASM_) {
+	if (modstate != 1) fprintf(ctx->fp, ",\n");
+	modstate = 2;
+	fprintf(ctx->fp, "  %s%s", arg ? LABEL_PREFIX_FFH : LABEL_PREFIX_CF, p);
+      }
+      if (regfunc != REGFUNC_NOREGUV) obuf[2]++;  /* Bump hash table size. */
+      libdef_name(regfunc == REGFUNC_NOREGUV ? "" : p, arg);
+    }
+  } else if (ctx->mode == BUILD_ffdef) {
+    fprintf(ctx->fp, "FFDEF(%s)\n", p);
+  } else if (ctx->mode == BUILD_recdef) {
+    if (strlen(p) > sizeof(funcname)-1) {
+      fprintf(stderr, "Error: function name too long: '%s'\n", p);
+      exit(1);
+    }
+    strcpy(funcname, p);
+  } else if (ctx->mode == BUILD_vmdef) {
+    int i;
+    for (i = 1; p[i] && modname[i-1]; i++)
+      if (p[i] == '_') p[i] = '.';
+    fprintf(ctx->fp, "\"%s\",\n", p);
+  } else if (ctx->mode == BUILD_bcdef) {
+    if (arg != LIBINIT_CF)
+      fprintf(ctx->fp, ",\n%d", find_ffofs(ctx, p));
+  }
+  ffid++;
+  regfunc = REGFUNC_OK;
+}
+
+static uint32_t find_rec(char *name)
+{
+  char *p = (char *)obuf;
+  uint32_t n;
+  for (n = 2; *p; n++) {
+    if (strcmp(p, name) == 0)
+      return n;
+    p += strlen(p)+1;
+  }
+  if (p+strlen(name)+1 >= (char *)obuf+sizeof(obuf)) {
+    fprintf(stderr, "Error: output buffer overflow\n");
+    exit(1);
+  }
+  strcpy(p, name);
+  return n;
+}
+
+static void libdef_rec(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(arg);
+  if (ctx->mode == BUILD_recdef) {
+    char *q;
+    uint32_t n;
+    for (; recffid+1 < ffid; recffid++)
+      fprintf(ctx->fp, ",\n0");
+    recffid = ffid;
+    if (*p == '.') p = funcname;
+    q = strchr(p, ' ');
+    if (q) *q++ = '\0';
+    n = find_rec(p);
+    if (q)
+      fprintf(ctx->fp, ",\n0x%02x00+(%s)", n, q);
+    else
+      fprintf(ctx->fp, ",\n0x%02x00", n);
+  }
+}
+
+static void memcpy_endian(void *dst, void *src, size_t n)
+{
+  union { uint8_t b; uint32_t u; } host_endian;
+  host_endian.u = 1;
+  if (host_endian.b == LJ_ENDIAN_SELECT(1, 0)) {
+    memcpy(dst, src, n);
+  } else {
+    size_t i;
+    for (i = 0; i < n; i++)
+      ((uint8_t *)dst)[i] = ((uint8_t *)src)[n-i-1];
+  }
+}
+
+static void libdef_push(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(arg);
+  if (ctx->mode == BUILD_libdef) {
+    int len = (int)strlen(p);
+    if (*p == '"') {
+      if (len > 1 && p[len-1] == '"') {
+	p[len-1] = '\0';
+	libdef_name(p+1, LIBINIT_STRING);
+	return;
+      }
+    } else if (*p >= '0' && *p <= '9') {
+      char *ep;
+      double d = strtod(p, &ep);
+      if (*ep == '\0') {
+	if (optr+1+sizeof(double) > obuf+sizeof(obuf)) {
+	  fprintf(stderr, "Error: output buffer overflow\n");
+	  exit(1);
+	}
+	*optr++ = LIBINIT_NUMBER;
+	memcpy_endian(optr, &d, sizeof(double));
+	optr += sizeof(double);
+	return;
+      }
+    } else if (!strcmp(p, "lastcl")) {
+      if (optr+1 > obuf+sizeof(obuf)) {
+	fprintf(stderr, "Error: output buffer overflow\n");
+	exit(1);
+      }
+      *optr++ = LIBINIT_LASTCL;
+      return;
+    } else if (len > 4 && !strncmp(p, "top-", 4)) {
+      if (optr+2 > obuf+sizeof(obuf)) {
+	fprintf(stderr, "Error: output buffer overflow\n");
+	exit(1);
+      }
+      *optr++ = LIBINIT_COPY;
+      *optr++ = (uint8_t)atoi(p+4);
+      return;
+    }
+    fprintf(stderr, "Error: bad value for %sPUSH(%s)\n", LIBDEF_PREFIX, p);
+    exit(1);
+  }
+}
+
+static void libdef_set(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(arg);
+  if (ctx->mode == BUILD_libdef) {
+    if (p[0] == '!' && p[1] == '\0') p[0] = '\0';  /* Set env. */
+    libdef_name(p, LIBINIT_STRING);
+    *optr++ = LIBINIT_SET;
+    obuf[2]++;  /* Bump hash table size. */
+  }
+}
+
+static void libdef_regfunc(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(ctx); UNUSED(p);
+  regfunc = arg;
+}
+
+typedef void (*LibDefFunc)(BuildCtx *ctx, char *p, int arg);
+
+typedef struct LibDefHandler {
+  const char *suffix;
+  const char *stop;
+  const LibDefFunc func;
+  const int arg;
+} LibDefHandler;
+
+static const LibDefHandler libdef_handlers[] = {
+  { "MODULE_",	" \t\r\n",	libdef_module,		0 },
+  { "CF(",	")",		libdef_func,		LIBINIT_CF },
+  { "ASM(",	")",		libdef_func,		LIBINIT_ASM },
+  { "ASM_(",	")",		libdef_func,		LIBINIT_ASM_ },
+  { "REC(",	")",		libdef_rec,		0 },
+  { "PUSH(",	")",		libdef_push,		0 },
+  { "SET(",	")",		libdef_set,		0 },
+  { "NOREGUV",	NULL,		libdef_regfunc,		REGFUNC_NOREGUV },
+  { "NOREG",	NULL,		libdef_regfunc,		REGFUNC_NOREG },
+  { NULL,	NULL,		(LibDefFunc)0,		0 }
+};
+
+/* Emit C source code for library function definitions. */
+void emit_lib(BuildCtx *ctx)
+{
+  const char *fname;
+
+  if (ctx->mode == BUILD_ffdef || ctx->mode == BUILD_libdef ||
+      ctx->mode == BUILD_recdef)
+    fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n");
+  else if (ctx->mode == BUILD_vmdef)
+    fprintf(ctx->fp, "ffnames = {\n[0]=\"Lua\",\n\"C\",\n");
+  if (ctx->mode == BUILD_recdef)
+    fprintf(ctx->fp, "static const uint16_t recff_idmap[] = {\n0,\n0x0100");
+  recffid = ffid = FF_C+1;
+  ffasmfunc = 0;
+
+  while ((fname = *ctx->args++)) {
+    char buf[256];  /* We don't care about analyzing lines longer than that. */
+    FILE *fp;
+    if (fname[0] == '-' && fname[1] == '\0') {
+      fp = stdin;
+    } else {
+      fp = fopen(fname, "r");
+      if (!fp) {
+	fprintf(stderr, "Error: cannot open input file '%s': %s\n",
+		fname, strerror(errno));
+	exit(1);
+      }
+    }
+    modstate = 0;
+    regfunc = REGFUNC_OK;
+    while (fgets(buf, sizeof(buf), fp) != NULL) {
+      char *p;
+      /* Simplistic pre-processor. Only handles top-level #if/#endif. */
+      if (buf[0] == '#' && buf[1] == 'i' && buf[2] == 'f') {
+	int ok = 1;
+	if (!strcmp(buf, "#if LJ_52\n"))
+	  ok = LJ_52;
+	else if (!strcmp(buf, "#if LJ_HASJIT\n"))
+	  ok = LJ_HASJIT;
+	else if (!strcmp(buf, "#if LJ_HASFFI\n"))
+	  ok = LJ_HASFFI;
+	if (!ok) {
+	  int lvl = 1;
+	  while (fgets(buf, sizeof(buf), fp) != NULL) {
+	    if (buf[0] == '#' && buf[1] == 'e' && buf[2] == 'n') {
+	      if (--lvl == 0) break;
+	    } else if (buf[0] == '#' && buf[1] == 'i' && buf[2] == 'f') {
+	      lvl++;
+	    }
+	  }
+	  continue;
+	}
+      }
+      for (p = buf; (p = strstr(p, LIBDEF_PREFIX)) != NULL; ) {
+	const LibDefHandler *ldh;
+	p += sizeof(LIBDEF_PREFIX)-1;
+	for (ldh = libdef_handlers; ldh->suffix != NULL; ldh++) {
+	  size_t n, len = strlen(ldh->suffix);
+	  if (!strncmp(p, ldh->suffix, len)) {
+	    p += len;
+	    n = ldh->stop ? strcspn(p, ldh->stop) : 0;
+	    if (!p[n]) break;
+	    p[n] = '\0';
+	    ldh->func(ctx, p, ldh->arg);
+	    p += n+1;
+	    break;
+	  }
+	}
+	if (ldh->suffix == NULL) {
+	  buf[strlen(buf)-1] = '\0';
+	  fprintf(stderr, "Error: unknown library definition tag %s%s\n",
+		  LIBDEF_PREFIX, p);
+	  exit(1);
+	}
+      }
+    }
+    fclose(fp);
+    if (ctx->mode == BUILD_libdef) {
+      libdef_endmodule(ctx);
+    }
+  }
+
+  if (ctx->mode == BUILD_ffdef) {
+    fprintf(ctx->fp, "\n#undef FFDEF\n\n");
+    fprintf(ctx->fp,
+      "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
+      ffasmfunc);
+  } else if (ctx->mode == BUILD_vmdef) {
+    fprintf(ctx->fp, "}\n\n");
+  } else if (ctx->mode == BUILD_bcdef) {
+    int i;
+    fprintf(ctx->fp, "\n};\n\n");
+    fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_mode[] = {\n");
+    fprintf(ctx->fp, "BCDEF(BCMODE)\n");
+    for (i = ffasmfunc-1; i > 0; i--)
+      fprintf(ctx->fp, "BCMODE_FF,\n");
+    fprintf(ctx->fp, "BCMODE_FF\n};\n\n");
+  } else if (ctx->mode == BUILD_recdef) {
+    char *p = (char *)obuf;
+    fprintf(ctx->fp, "\n};\n\n");
+    fprintf(ctx->fp, "static const RecordFunc recff_func[] = {\n"
+	    "recff_nyi,\n"
+	    "recff_c");
+    while (*p) {
+      fprintf(ctx->fp, ",\nrecff_%s", p);
+      p += strlen(p)+1;
+    }
+    fprintf(ctx->fp, "\n};\n\n");
+  }
+}
+

+ 368 - 0
luajit.mod/luajit/src/host/buildvm_peobj.c

@@ -0,0 +1,368 @@
+/*
+** LuaJIT VM builder: PE object emitter.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Only used for building on Windows, since we cannot assume the presence
+** of a suitable assembler. The host and target byte order must match.
+*/
+
+#include "buildvm.h"
+#include "lj_bc.h"
+
+#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC
+
+/* Context for PE object emitter. */
+static char *strtab;
+static size_t strtabofs;
+
+/* -- PE object definitions ----------------------------------------------- */
+
+/* PE header. */
+typedef struct PEheader {
+  uint16_t arch;
+  uint16_t nsects;
+  uint32_t time;
+  uint32_t symtabofs;
+  uint32_t nsyms;
+  uint16_t opthdrsz;
+  uint16_t flags;
+} PEheader;
+
+/* PE section. */
+typedef struct PEsection {
+  char name[8];
+  uint32_t vsize;
+  uint32_t vaddr;
+  uint32_t size;
+  uint32_t ofs;
+  uint32_t relocofs;
+  uint32_t lineofs;
+  uint16_t nreloc;
+  uint16_t nline;
+  uint32_t flags;
+} PEsection;
+
+/* PE relocation. */
+typedef struct PEreloc {
+  uint32_t vaddr;
+  uint32_t symidx;
+  uint16_t type;
+} PEreloc;
+
+/* Cannot use sizeof, because it pads up to the max. alignment. */
+#define PEOBJ_RELOC_SIZE	(4+4+2)
+
+/* PE symbol table entry. */
+typedef struct PEsym {
+  union {
+    char name[8];
+    uint32_t nameref[2];
+  } n;
+  uint32_t value;
+  int16_t sect;
+  uint16_t type;
+  uint8_t scl;
+  uint8_t naux;
+} PEsym;
+
+/* PE symbol table auxiliary entry for a section. */
+typedef struct PEsymaux {
+  uint32_t size;
+  uint16_t nreloc;
+  uint16_t nline;
+  uint32_t cksum;
+  uint16_t assoc;
+  uint8_t comdatsel;
+  uint8_t unused[3];
+} PEsymaux;
+
+/* Cannot use sizeof, because it pads up to the max. alignment. */
+#define PEOBJ_SYM_SIZE	(8+4+2+2+1+1)
+
+/* PE object CPU specific defines. */
+#if LJ_TARGET_X86
+#define PEOBJ_ARCH_TARGET	0x014c
+#define PEOBJ_RELOC_REL32	0x14  /* MS: REL32, GNU: DISP32. */
+#define PEOBJ_RELOC_DIR32	0x06
+#define PEOBJ_RELOC_OFS		0
+#define PEOBJ_TEXT_FLAGS	0x60500020  /* 60=r+x, 50=align16, 20=code. */
+#elif LJ_TARGET_X64
+#define PEOBJ_ARCH_TARGET	0x8664
+#define PEOBJ_RELOC_REL32	0x04  /* MS: REL32, GNU: DISP32. */
+#define PEOBJ_RELOC_DIR32	0x02
+#define PEOBJ_RELOC_ADDR32NB	0x03
+#define PEOBJ_RELOC_OFS		0
+#define PEOBJ_TEXT_FLAGS	0x60500020  /* 60=r+x, 50=align16, 20=code. */
+#elif LJ_TARGET_PPC
+#define PEOBJ_ARCH_TARGET	0x01f2
+#define PEOBJ_RELOC_REL32	0x06
+#define PEOBJ_RELOC_DIR32	0x02
+#define PEOBJ_RELOC_OFS		(-4)
+#define PEOBJ_TEXT_FLAGS	0x60400020  /* 60=r+x, 40=align8, 20=code. */
+#endif
+
+/* Section numbers (0-based). */
+enum {
+  PEOBJ_SECT_ABS = -2,
+  PEOBJ_SECT_UNDEF = -1,
+  PEOBJ_SECT_TEXT,
+#if LJ_TARGET_X64
+  PEOBJ_SECT_PDATA,
+  PEOBJ_SECT_XDATA,
+#endif
+  PEOBJ_SECT_RDATA_Z,
+  PEOBJ_NSECTIONS
+};
+
+/* Symbol types. */
+#define PEOBJ_TYPE_NULL		0
+#define PEOBJ_TYPE_FUNC		0x20
+
+/* Symbol storage class. */
+#define PEOBJ_SCL_EXTERN	2
+#define PEOBJ_SCL_STATIC	3
+
+/* -- PE object emitter --------------------------------------------------- */
+
+/* Emit PE object symbol. */
+static void emit_peobj_sym(BuildCtx *ctx, const char *name, uint32_t value,
+			   int sect, int type, int scl)
+{
+  PEsym sym;
+  size_t len = strlen(name);
+  if (!strtab) {  /* Pass 1: only calculate string table length. */
+    if (len > 8) strtabofs += len+1;
+    return;
+  }
+  if (len <= 8) {
+    memcpy(sym.n.name, name, len);
+    memset(sym.n.name+len, 0, 8-len);
+  } else {
+    sym.n.nameref[0] = 0;
+    sym.n.nameref[1] = (uint32_t)strtabofs;
+    memcpy(strtab + strtabofs, name, len);
+    strtab[strtabofs+len] = 0;
+    strtabofs += len+1;
+  }
+  sym.value = value;
+  sym.sect = (int16_t)(sect+1);  /* 1-based section number. */
+  sym.type = (uint16_t)type;
+  sym.scl = (uint8_t)scl;
+  sym.naux = 0;
+  owrite(ctx, &sym, PEOBJ_SYM_SIZE);
+}
+
+/* Emit PE object section symbol. */
+static void emit_peobj_sym_sect(BuildCtx *ctx, PEsection *pesect, int sect)
+{
+  PEsym sym;
+  PEsymaux aux;
+  if (!strtab) return;  /* Pass 1: no output. */
+  memcpy(sym.n.name, pesect[sect].name, 8);
+  sym.value = 0;
+  sym.sect = (int16_t)(sect+1);  /* 1-based section number. */
+  sym.type = PEOBJ_TYPE_NULL;
+  sym.scl = PEOBJ_SCL_STATIC;
+  sym.naux = 1;
+  owrite(ctx, &sym, PEOBJ_SYM_SIZE);
+  memset(&aux, 0, sizeof(PEsymaux));
+  aux.size = pesect[sect].size;
+  aux.nreloc = pesect[sect].nreloc;
+  owrite(ctx, &aux, PEOBJ_SYM_SIZE);
+}
+
+/* Emit Windows PE object file. */
+void emit_peobj(BuildCtx *ctx)
+{
+  PEheader pehdr;
+  PEsection pesect[PEOBJ_NSECTIONS];
+  uint32_t sofs;
+  int i, nrsym;
+  union { uint8_t b; uint32_t u; } host_endian;
+
+  sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
+
+  /* Fill in PE sections. */
+  memset(&pesect, 0, PEOBJ_NSECTIONS*sizeof(PEsection));
+  memcpy(pesect[PEOBJ_SECT_TEXT].name, ".text", sizeof(".text")-1);
+  pesect[PEOBJ_SECT_TEXT].ofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_TEXT].size = (uint32_t)ctx->codesz);
+  pesect[PEOBJ_SECT_TEXT].relocofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_TEXT].nreloc = (uint16_t)ctx->nreloc) * PEOBJ_RELOC_SIZE;
+  /* Flags: 60 = read+execute, 50 = align16, 20 = code. */
+  pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
+
+#if LJ_TARGET_X64
+  memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
+  pesect[PEOBJ_SECT_PDATA].ofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4);
+  pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE;
+  /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
+  pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
+
+  memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
+  pesect[PEOBJ_SECT_XDATA].ofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2);  /* See below. */
+  pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
+  /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
+  pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
+#endif
+
+  memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
+  pesect[PEOBJ_SECT_RDATA_Z].ofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_RDATA_Z].size = (uint32_t)strlen(ctx->dasm_ident)+1);
+  /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
+  pesect[PEOBJ_SECT_RDATA_Z].flags = 0x40300040;
+
+  /* Fill in PE header. */
+  pehdr.arch = PEOBJ_ARCH_TARGET;
+  pehdr.nsects = PEOBJ_NSECTIONS;
+  pehdr.time = 0;  /* Timestamp is optional. */
+  pehdr.symtabofs = sofs;
+  pehdr.opthdrsz = 0;
+  pehdr.flags = 0;
+
+  /* Compute the size of the symbol table:
+  ** @feat.00 + nsections*2
+  ** + asm_start + nsym
+  ** + nrsym
+  */
+  nrsym = ctx->nrelocsym;
+  pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
+#if LJ_TARGET_X64
+  pehdr.nsyms += 1;  /* Symbol for lj_err_unwind_win64. */
+#endif
+
+  /* Write PE object header and all sections. */
+  owrite(ctx, &pehdr, sizeof(PEheader));
+  owrite(ctx, &pesect, sizeof(PEsection)*PEOBJ_NSECTIONS);
+
+  /* Write .text section. */
+  host_endian.u = 1;
+  if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) {
+#if LJ_TARGET_PPC
+    uint32_t *p = (uint32_t *)ctx->code;
+    int n = (int)(ctx->codesz >> 2);
+    for (i = 0; i < n; i++, p++)
+      *p = lj_bswap(*p);  /* Byteswap .text section. */
+#else
+    fprintf(stderr, "Error: different byte order for host and target\n");
+    exit(1);
+#endif
+  }
+  owrite(ctx, ctx->code, ctx->codesz);
+  for (i = 0; i < ctx->nreloc; i++) {
+    PEreloc reloc;
+    reloc.vaddr = (uint32_t)ctx->reloc[i].ofs + PEOBJ_RELOC_OFS;
+    reloc.symidx = 1+2+ctx->reloc[i].sym;  /* Reloc syms are after .text sym. */
+    reloc.type = ctx->reloc[i].type ? PEOBJ_RELOC_REL32 : PEOBJ_RELOC_DIR32;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+  }
+
+#if LJ_TARGET_X64
+  { /* Write .pdata section. */
+    uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
+    uint32_t pdata[3];  /* Start of .text, end of .text and .xdata. */
+    PEreloc reloc;
+    pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
+    owrite(ctx, &pdata, sizeof(pdata));
+    pdata[0] = fcofs; pdata[1] = (uint32_t)ctx->codesz; pdata[2] = 20;
+    owrite(ctx, &pdata, sizeof(pdata));
+    reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 16; reloc.symidx = 1+2+nrsym+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 20; reloc.symidx = 1+2+nrsym+2;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+  }
+  { /* Write .xdata section. */
+    uint16_t xdata[8+2+6];
+    PEreloc reloc;
+    xdata[0] = 0x01|0x08|0x10;  /* Ver. 1, uhandler/ehandler, prolog size 0. */
+    xdata[1] = 0x0005;  /* Number of unwind codes, no frame pointer. */
+    xdata[2] = 0x4200;  /* Stack offset 4*8+8 = aword*5. */
+    xdata[3] = 0x3000;  /* Push rbx. */
+    xdata[4] = 0x6000;  /* Push rsi. */
+    xdata[5] = 0x7000;  /* Push rdi. */
+    xdata[6] = 0x5000;  /* Push rbp. */
+    xdata[7] = 0;  /* Alignment. */
+    xdata[8] = xdata[9] = 0;  /* Relocated address of exception handler. */
+    xdata[10] = 0x01;  /* Ver. 1, no handler, prolog size 0. */
+    xdata[11] = 0x1504;  /* Number of unwind codes, fp = rbp, fpofs = 16. */
+    xdata[12] = 0x0300;  /* set_fpreg. */
+    xdata[13] = 0x0200;  /* stack offset 0*8+8 = aword*1. */
+    xdata[14] = 0x3000;  /* Push rbx. */
+    xdata[15] = 0x5000;  /* Push rbp. */
+    owrite(ctx, &xdata, sizeof(xdata));
+    reloc.vaddr = 2*8; reloc.symidx = 1+2+nrsym+2+2;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+  }
+#endif
+
+  /* Write .rdata$Z section. */
+  owrite(ctx, ctx->dasm_ident, strlen(ctx->dasm_ident)+1);
+
+  /* Write symbol table. */
+  strtab = NULL;  /* 1st pass: collect string sizes. */
+  for (;;) {
+    strtabofs = 4;
+    /* Mark as SafeSEH compliant. */
+    emit_peobj_sym(ctx, "@feat.00", 1,
+		   PEOBJ_SECT_ABS, PEOBJ_TYPE_NULL, PEOBJ_SCL_STATIC);
+
+    emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT);
+    for (i = 0; i < nrsym; i++)
+      emit_peobj_sym(ctx, ctx->relocsym[i], 0,
+		     PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
+
+#if LJ_TARGET_X64
+    emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
+    emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
+    emit_peobj_sym(ctx, "lj_err_unwind_win64", 0,
+		   PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
+#endif
+
+    emit_peobj_sym(ctx, ctx->beginsym, 0,
+		   PEOBJ_SECT_TEXT, PEOBJ_TYPE_NULL, PEOBJ_SCL_EXTERN);
+    for (i = 0; i < ctx->nsym; i++)
+      emit_peobj_sym(ctx, ctx->sym[i].name, (uint32_t)ctx->sym[i].ofs,
+		     PEOBJ_SECT_TEXT, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
+
+    emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_RDATA_Z);
+
+    if (strtab)
+      break;
+    /* 2nd pass: alloc strtab, write syms and copy strings. */
+    strtab = (char *)malloc(strtabofs);
+    *(uint32_t *)strtab = (uint32_t)strtabofs;
+  }
+
+  /* Write string table. */
+  owrite(ctx, strtab, strtabofs);
+}
+
+#else
+
+void emit_peobj(BuildCtx *ctx)
+{
+  UNUSED(ctx);
+  fprintf(stderr, "Error: no PE object support for this target\n");
+  exit(1);
+}
+
+#endif

+ 428 - 0
luajit.mod/luajit/src/host/genminilua.lua

@@ -0,0 +1,428 @@
+----------------------------------------------------------------------------
+-- Lua script to generate a customized, minified version of Lua.
+-- The resulting 'minilua' is used for the build process of LuaJIT.
+----------------------------------------------------------------------------
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+
+local sub, match, gsub = string.sub, string.match, string.gsub
+
+local LUA_VERSION = "5.1.5"
+local LUA_SOURCE
+
+local function usage()
+  io.stderr:write("Usage: ", arg and arg[0] or "genminilua",
+		  " lua-", LUA_VERSION, "-source-dir\n")
+  os.exit(1)
+end
+
+local function find_sources()
+  LUA_SOURCE = arg and arg[1]
+  if not LUA_SOURCE then usage() end
+  if sub(LUA_SOURCE, -1) ~= "/" then LUA_SOURCE = LUA_SOURCE.."/" end
+  local fp = io.open(LUA_SOURCE .. "lua.h")
+  if not fp then
+    LUA_SOURCE = LUA_SOURCE.."src/"
+    fp = io.open(LUA_SOURCE .. "lua.h")
+    if not fp then usage() end
+  end
+  local all = fp:read("*a")
+  fp:close()
+  if not match(all, 'LUA_RELEASE%s*"Lua '..LUA_VERSION..'"') then
+    io.stderr:write("Error: version mismatch\n")
+    usage()
+  end
+end
+
+local LUA_FILES = {
+"lmem.c", "lobject.c", "ltm.c", "lfunc.c", "ldo.c", "lstring.c", "ltable.c",
+"lgc.c", "lstate.c", "ldebug.c", "lzio.c", "lopcodes.c",
+"llex.c", "lcode.c", "lparser.c", "lvm.c", "lapi.c", "lauxlib.c",
+"lbaselib.c", "ltablib.c", "liolib.c", "loslib.c", "lstrlib.c", "linit.c",
+}
+
+local REMOVE_LIB = {}
+gsub([[
+collectgarbage dofile gcinfo getfenv getmetatable load print rawequal rawset
+select tostring xpcall
+foreach foreachi getn maxn setn
+popen tmpfile seek setvbuf __tostring
+clock date difftime execute getenv rename setlocale time tmpname
+dump gfind len reverse
+LUA_LOADLIBNAME LUA_MATHLIBNAME LUA_DBLIBNAME
+]], "%S+", function(name)
+  REMOVE_LIB[name] = true
+end)
+
+local REMOVE_EXTINC = { ["<assert.h>"] = true, ["<locale.h>"] = true, }
+
+local CUSTOM_MAIN = [[
+typedef unsigned int UB;
+static UB barg(lua_State *L,int idx){
+union{lua_Number n;U64 b;}bn;
+bn.n=lua_tonumber(L,idx)+6755399441055744.0;
+if (bn.n==0.0&&!lua_isnumber(L,idx))luaL_typerror(L,idx,"number");
+return(UB)bn.b;
+}
+#define BRET(b) lua_pushnumber(L,(lua_Number)(int)(b));return 1;
+static int tobit(lua_State *L){
+BRET(barg(L,1))}
+static int bnot(lua_State *L){
+BRET(~barg(L,1))}
+static int band(lua_State *L){
+int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b&=barg(L,i);BRET(b)}
+static int bor(lua_State *L){
+int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b|=barg(L,i);BRET(b)}
+static int bxor(lua_State *L){
+int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b^=barg(L,i);BRET(b)}
+static int lshift(lua_State *L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET(b<<n)}
+static int rshift(lua_State *L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET(b>>n)}
+static int arshift(lua_State *L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET((int)b>>n)}
+static int rol(lua_State *L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET((b<<n)|(b>>(32-n)))}
+static int ror(lua_State *L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET((b>>n)|(b<<(32-n)))}
+static int bswap(lua_State *L){
+UB b=barg(L,1);b=(b>>24)|((b>>8)&0xff00)|((b&0xff00)<<8)|(b<<24);BRET(b)}
+static int tohex(lua_State *L){
+UB b=barg(L,1);
+int n=lua_isnone(L,2)?8:(int)barg(L,2);
+const char *hexdigits="0123456789abcdef";
+char buf[8];
+int i;
+if(n<0){n=-n;hexdigits="0123456789ABCDEF";}
+if(n>8)n=8;
+for(i=(int)n;--i>=0;){buf[i]=hexdigits[b&15];b>>=4;}
+lua_pushlstring(L,buf,(size_t)n);
+return 1;
+}
+static const struct luaL_Reg bitlib[] = {
+{"tobit",tobit},
+{"bnot",bnot},
+{"band",band},
+{"bor",bor},
+{"bxor",bxor},
+{"lshift",lshift},
+{"rshift",rshift},
+{"arshift",arshift},
+{"rol",rol},
+{"ror",ror},
+{"bswap",bswap},
+{"tohex",tohex},
+{NULL,NULL}
+};
+int main(int argc, char **argv){
+  lua_State *L = luaL_newstate();
+  int i;
+  luaL_openlibs(L);
+  luaL_register(L, "bit", bitlib);
+  if (argc < 2) return sizeof(void *);
+  lua_createtable(L, 0, 1);
+  lua_pushstring(L, argv[1]);
+  lua_rawseti(L, -2, 0);
+  lua_setglobal(L, "arg");
+  if (luaL_loadfile(L, argv[1]))
+    goto err;
+  for (i = 2; i < argc; i++)
+    lua_pushstring(L, argv[i]);
+  if (lua_pcall(L, argc - 2, 0, 0)) {
+  err:
+    fprintf(stderr, "Error: %s\n", lua_tostring(L, -1));
+    return 1;
+  }
+  lua_close(L);
+  return 0;
+}
+]]
+
+local function read_sources()
+  local t = {}
+  for i, name in ipairs(LUA_FILES) do
+    local fp = assert(io.open(LUA_SOURCE..name, "r"))
+    t[i] = fp:read("*a")
+    assert(fp:close())
+  end
+  t[#t+1] = CUSTOM_MAIN
+  return table.concat(t)
+end
+
+local includes = {}
+
+local function merge_includes(src)
+  return gsub(src, '#include%s*"([^"]*)"%s*\n', function(name)
+    if includes[name] then return "" end
+    includes[name] = true
+    local fp = assert(io.open(LUA_SOURCE..name, "r"))
+    local src = fp:read("*a")
+    assert(fp:close())
+    src = gsub(src, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "")
+    src = gsub(src, "#endif%s*$", "")
+    return merge_includes(src)
+  end)
+end
+
+local function get_license(src)
+  return match(src, "/%*+\n%* Copyright %(.-%*/\n")
+end
+
+local function fold_lines(src)
+  return gsub(src, "\\\n", " ")
+end
+
+local strings = {}
+
+local function save_str(str)
+  local n = #strings+1
+  strings[n] = str
+  return "\1"..n.."\2"
+end
+
+local function save_strings(src)
+  src = gsub(src, '"[^"\n]*"', save_str)
+  return gsub(src, "'[^'\n]*'", save_str)
+end
+
+local function restore_strings(src)
+  return gsub(src, "\1(%d+)\2", function(numstr)
+    return strings[tonumber(numstr)]
+  end)
+end
+
+local function def_istrue(def)
+  return def == "INT_MAX > 2147483640L" or
+	 def == "LUAI_BITSINT >= 32" or
+	 def == "SIZE_Bx < LUAI_BITSINT-1" or
+	 def == "cast" or
+	 def == "defined(LUA_CORE)" or
+	 def == "MINSTRTABSIZE" or
+	 def == "LUA_MINBUFFER" or
+	 def == "HARDSTACKTESTS" or
+	 def == "UNUSED"
+end
+
+local head, defs = {[[
+#ifdef _MSC_VER
+typedef unsigned __int64 U64;
+#else
+typedef unsigned long long U64;
+#endif
+int _CRT_glob = 0;
+]]}, {}
+
+local function preprocess(src)
+  local t = { match(src, "^(.-)#") }
+  local lvl, on, oldon = 0, true, {}
+  for pp, def, txt in string.gmatch(src, "#(%w+) *([^\n]*)\n([^#]*)") do
+    if pp == "if" or pp == "ifdef" or pp == "ifndef" then
+      lvl = lvl + 1
+      oldon[lvl] = on
+      on = def_istrue(def)
+    elseif pp == "else" then
+      if oldon[lvl] then
+	if on == false then on = true else on = false end
+      end
+    elseif pp == "elif" then
+      if oldon[lvl] then
+	on = def_istrue(def)
+      end
+    elseif pp == "endif" then
+      on = oldon[lvl]
+      lvl = lvl - 1
+    elseif on then
+      if pp == "include" then
+	if not head[def] and not REMOVE_EXTINC[def] then
+	  head[def] = true
+	  head[#head+1] = "#include "..def.."\n"
+	end
+      elseif pp == "define" then
+	local k, sp, v = match(def, "([%w_]+)(%s*)(.*)")
+	if k and not (sp == "" and sub(v, 1, 1) == "(") then
+	  defs[k] = gsub(v, "%a[%w_]*", function(tok)
+	    return defs[tok] or tok
+	  end)
+	else
+	  t[#t+1] = "#define "..def.."\n"
+	end
+      elseif pp ~= "undef" then
+	error("unexpected directive: "..pp.." "..def)
+      end
+    end
+    if on then t[#t+1] = txt end
+  end
+  return gsub(table.concat(t), "%a[%w_]*", function(tok)
+    return defs[tok] or tok
+  end)
+end
+
+local function merge_header(src, license)
+  local hdr = string.format([[
+/* This is a heavily customized and minimized copy of Lua %s. */
+/* It's only used to build LuaJIT. It does NOT have all standard functions! */
+]], LUA_VERSION)
+  return hdr..license..table.concat(head)..src
+end
+
+local function strip_unused1(src)
+  return gsub(src, '(  {"?([%w_]+)"?,%s+%a[%w_]*},\n)', function(line, func)
+    return REMOVE_LIB[func] and "" or line
+  end)
+end
+
+local function strip_unused2(src)
+  return gsub(src, "Symbolic Execution.-}=", "")
+end
+
+local function strip_unused3(src)
+  src = gsub(src, "extern", "static")
+  src = gsub(src, "\nstatic([^\n]-)%(([^)]*)%)%(", "\nstatic%1 %2(")
+  src = gsub(src, "#define lua_assert[^\n]*\n", "")
+  src = gsub(src, "lua_assert%b();?", "")
+  src = gsub(src, "default:\n}", "default:;\n}")
+  src = gsub(src, "lua_lock%b();", "")
+  src = gsub(src, "lua_unlock%b();", "")
+  src = gsub(src, "luai_threadyield%b();", "")
+  src = gsub(src, "luai_userstateopen%b();", "{}")
+  src = gsub(src, "luai_userstate%w+%b();", "")
+  src = gsub(src, "%(%(c==.*luaY_parser%)", "luaY_parser")
+  src = gsub(src, "trydecpoint%(ls,seminfo%)",
+		  "luaX_lexerror(ls,\"malformed number\",TK_NUMBER)")
+  src = gsub(src, "int c=luaZ_lookahead%b();", "")
+  src = gsub(src, "luaL_register%(L,[^,]*,co_funcs%);\nreturn 2;",
+		  "return 1;")
+  src = gsub(src, "getfuncname%b():", "NULL:")
+  src = gsub(src, "getobjname%b():", "NULL:")
+  src = gsub(src, "if%([^\n]*hookmask[^\n]*%)\n[^\n]*\n", "")
+  src = gsub(src, "if%([^\n]*hookmask[^\n]*%)%b{}\n", "")
+  src = gsub(src, "if%([^\n]*hookmask[^\n]*&&\n[^\n]*%b{}\n", "")
+  src = gsub(src, "(twoto%b()%()", "%1(size_t)")
+  src = gsub(src, "i<sizenode", "i<(int)sizenode")
+  return gsub(src, "\n\n+", "\n")
+end
+
+local function strip_comments(src)
+  return gsub(src, "/%*.-%*/", " ")
+end
+
+local function strip_whitespace(src)
+  src = gsub(src, "^%s+", "")
+  src = gsub(src, "%s*\n%s*", "\n")
+  src = gsub(src, "[ \t]+", " ")
+  src = gsub(src, "(%W) ", "%1")
+  return gsub(src, " (%W)", "%1")
+end
+
+local function rename_tokens1(src)
+  src = gsub(src, "getline", "getline_")
+  src = gsub(src, "struct ([%w_]+)", "ZX%1")
+  return gsub(src, "union ([%w_]+)", "ZY%1")
+end
+
+local function rename_tokens2(src)
+  src = gsub(src, "ZX([%w_]+)", "struct %1")
+  return gsub(src, "ZY([%w_]+)", "union %1")
+end
+
+local function func_gather(src)
+  local nodes, list = {}, {}
+  local pos, len = 1, #src
+  while pos < len do
+    local d, w = match(src, "^(#define ([%w_]+)[^\n]*\n)", pos)
+    if d then
+      local n = #list+1
+      list[n] = d
+      nodes[w] = n
+    else
+      local s
+      d, w, s = match(src, "^(([%w_]+)[^\n]*([{;])\n)", pos)
+      if not d then
+	d, w, s = match(src, "^(([%w_]+)[^(]*%b()([{;])\n)", pos)
+	if not d then d = match(src, "^[^\n]*\n", pos) end
+      end
+      if s == "{" then
+	d = d..sub(match(src, "^%b{}[^;\n]*;?\n", pos+#d-2), 3)
+	if sub(d, -2) == "{\n" then
+	  d = d..sub(match(src, "^%b{}[^;\n]*;?\n", pos+#d-2), 3)
+	end
+      end
+      local k, v = nil, d
+      if w == "typedef" then
+	if match(d, "^typedef enum") then
+	  head[#head+1] = d
+	else
+	  k = match(d, "([%w_]+);\n$")
+	  if not k then k = match(d, "^.-%(.-([%w_]+)%)%(") end
+	end
+      elseif w == "enum" then
+	head[#head+1] = v
+      elseif w ~= nil then
+	k = match(d, "^[^\n]-([%w_]+)[(%[=]")
+	if k then
+	  if w ~= "static" and k ~= "main" then v = "static "..d end
+	else
+	  k = w
+	end
+      end
+      if w and k then
+	local o = nodes[k]
+	if o then nodes["*"..k] = o end
+	local n = #list+1
+	list[n] = v
+	nodes[k] = n
+      end
+    end
+    pos = pos + #d
+  end
+  return nodes, list
+end
+
+local function func_visit(nodes, list, used, n)
+  local i = nodes[n]
+  for m in string.gmatch(list[i], "[%w_]+") do
+    if nodes[m] then
+      local j = used[m]
+      if not j then
+	used[m] = i
+	func_visit(nodes, list, used, m)
+      elseif i < j then
+	used[m] = i
+      end
+    end
+  end
+end
+
+local function func_collect(src)
+  local nodes, list = func_gather(src)
+  local used = {}
+  func_visit(nodes, list, used, "main")
+  for n,i in pairs(nodes) do
+    local j = used[n]
+    if j and j < i then used["*"..n] = j end
+  end
+  for n,i in pairs(nodes) do
+    if not used[n] then list[i] = "" end
+  end
+  return table.concat(list)
+end
+
+find_sources()
+local src = read_sources()
+src = merge_includes(src)
+local license = get_license(src)
+src = fold_lines(src)
+src = strip_unused1(src)
+src = save_strings(src)
+src = strip_unused2(src)
+src = strip_comments(src)
+src = preprocess(src)
+src = strip_whitespace(src)
+src = strip_unused3(src)
+src = rename_tokens1(src)
+src = func_collect(src)
+src = rename_tokens2(src)
+src = restore_strings(src)
+src = merge_header(src, license)
+io.write(src)

+ 7770 - 0
luajit.mod/luajit/src/host/minilua.c

@@ -0,0 +1,7770 @@
+/* This is a heavily customized and minimized copy of Lua 5.1.5. */
+/* It's only used to build LuaJIT. It does NOT have all standard functions! */
+/******************************************************************************
+* Copyright (C) 1994-2012 Lua.org, PUC-Rio.  All rights reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+******************************************************************************/
+#ifdef _MSC_VER
+typedef unsigned __int64 U64;
+#else
+typedef unsigned long long U64;
+#endif
+int _CRT_glob = 0;
+#include <stddef.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <math.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <setjmp.h>
+#include <errno.h>
+#include <time.h>
+typedef enum{
+TM_INDEX,
+TM_NEWINDEX,
+TM_GC,
+TM_MODE,
+TM_EQ,
+TM_ADD,
+TM_SUB,
+TM_MUL,
+TM_DIV,
+TM_MOD,
+TM_POW,
+TM_UNM,
+TM_LEN,
+TM_LT,
+TM_LE,
+TM_CONCAT,
+TM_CALL,
+TM_N
+}TMS;
+enum OpMode{iABC,iABx,iAsBx};
+typedef enum{
+OP_MOVE,
+OP_LOADK,
+OP_LOADBOOL,
+OP_LOADNIL,
+OP_GETUPVAL,
+OP_GETGLOBAL,
+OP_GETTABLE,
+OP_SETGLOBAL,
+OP_SETUPVAL,
+OP_SETTABLE,
+OP_NEWTABLE,
+OP_SELF,
+OP_ADD,
+OP_SUB,
+OP_MUL,
+OP_DIV,
+OP_MOD,
+OP_POW,
+OP_UNM,
+OP_NOT,
+OP_LEN,
+OP_CONCAT,
+OP_JMP,
+OP_EQ,
+OP_LT,
+OP_LE,
+OP_TEST,
+OP_TESTSET,
+OP_CALL,
+OP_TAILCALL,
+OP_RETURN,
+OP_FORLOOP,
+OP_FORPREP,
+OP_TFORLOOP,
+OP_SETLIST,
+OP_CLOSE,
+OP_CLOSURE,
+OP_VARARG
+}OpCode;
+enum OpArgMask{
+OpArgN,
+OpArgU,
+OpArgR,
+OpArgK
+};
+typedef enum{
+VVOID,
+VNIL,
+VTRUE,
+VFALSE,
+VK,
+VKNUM,
+VLOCAL,
+VUPVAL,
+VGLOBAL,
+VINDEXED,
+VJMP,
+VRELOCABLE,
+VNONRELOC,
+VCALL,
+VVARARG
+}expkind;
+enum RESERVED{
+TK_AND=257,TK_BREAK,
+TK_DO,TK_ELSE,TK_ELSEIF,TK_END,TK_FALSE,TK_FOR,TK_FUNCTION,
+TK_IF,TK_IN,TK_LOCAL,TK_NIL,TK_NOT,TK_OR,TK_REPEAT,
+TK_RETURN,TK_THEN,TK_TRUE,TK_UNTIL,TK_WHILE,
+TK_CONCAT,TK_DOTS,TK_EQ,TK_GE,TK_LE,TK_NE,TK_NUMBER,
+TK_NAME,TK_STRING,TK_EOS
+};
+typedef enum BinOpr{
+OPR_ADD,OPR_SUB,OPR_MUL,OPR_DIV,OPR_MOD,OPR_POW,
+OPR_CONCAT,
+OPR_NE,OPR_EQ,
+OPR_LT,OPR_LE,OPR_GT,OPR_GE,
+OPR_AND,OPR_OR,
+OPR_NOBINOPR
+}BinOpr;
+typedef enum UnOpr{OPR_MINUS,OPR_NOT,OPR_LEN,OPR_NOUNOPR}UnOpr;
+#define LUA_QL(x)"'"x"'"
+#define luai_apicheck(L,o){(void)L;}
+#define lua_number2str(s,n)sprintf((s),"%.14g",(n))
+#define lua_str2number(s,p)strtod((s),(p))
+#define luai_numadd(a,b)((a)+(b))
+#define luai_numsub(a,b)((a)-(b))
+#define luai_nummul(a,b)((a)*(b))
+#define luai_numdiv(a,b)((a)/(b))
+#define luai_nummod(a,b)((a)-floor((a)/(b))*(b))
+#define luai_numpow(a,b)(pow(a,b))
+#define luai_numunm(a)(-(a))
+#define luai_numeq(a,b)((a)==(b))
+#define luai_numlt(a,b)((a)<(b))
+#define luai_numle(a,b)((a)<=(b))
+#define luai_numisnan(a)(!luai_numeq((a),(a)))
+#define lua_number2int(i,d)((i)=(int)(d))
+#define lua_number2integer(i,d)((i)=(lua_Integer)(d))
+#define LUAI_THROW(L,c)longjmp((c)->b,1)
+#define LUAI_TRY(L,c,a)if(setjmp((c)->b)==0){a}
+#define lua_pclose(L,file)((void)((void)L,file),0)
+#define lua_upvalueindex(i)((-10002)-(i))
+typedef struct lua_State lua_State;
+typedef int(*lua_CFunction)(lua_State*L);
+typedef const char*(*lua_Reader)(lua_State*L,void*ud,size_t*sz);
+typedef void*(*lua_Alloc)(void*ud,void*ptr,size_t osize,size_t nsize);
+typedef double lua_Number;
+typedef ptrdiff_t lua_Integer;
+static void lua_settop(lua_State*L,int idx);
+static int lua_type(lua_State*L,int idx);
+static const char* lua_tolstring(lua_State*L,int idx,size_t*len);
+static size_t lua_objlen(lua_State*L,int idx);
+static void lua_pushlstring(lua_State*L,const char*s,size_t l);
+static void lua_pushcclosure(lua_State*L,lua_CFunction fn,int n);
+static void lua_createtable(lua_State*L,int narr,int nrec);
+static void lua_setfield(lua_State*L,int idx,const char*k);
+#define lua_pop(L,n)lua_settop(L,-(n)-1)
+#define lua_newtable(L)lua_createtable(L,0,0)
+#define lua_pushcfunction(L,f)lua_pushcclosure(L,(f),0)
+#define lua_strlen(L,i)lua_objlen(L,(i))
+#define lua_isfunction(L,n)(lua_type(L,(n))==6)
+#define lua_istable(L,n)(lua_type(L,(n))==5)
+#define lua_isnil(L,n)(lua_type(L,(n))==0)
+#define lua_isboolean(L,n)(lua_type(L,(n))==1)
+#define lua_isnone(L,n)(lua_type(L,(n))==(-1))
+#define lua_isnoneornil(L,n)(lua_type(L,(n))<=0)
+#define lua_pushliteral(L,s)lua_pushlstring(L,""s,(sizeof(s)/sizeof(char))-1)
+#define lua_setglobal(L,s)lua_setfield(L,(-10002),(s))
+#define lua_tostring(L,i)lua_tolstring(L,(i),NULL)
+typedef struct lua_Debug lua_Debug;
+typedef void(*lua_Hook)(lua_State*L,lua_Debug*ar);
+struct lua_Debug{
+int event;
+const char*name;
+const char*namewhat;
+const char*what;
+const char*source;
+int currentline;
+int nups;
+int linedefined;
+int lastlinedefined;
+char short_src[60];
+int i_ci;
+};
+typedef unsigned int lu_int32;
+typedef size_t lu_mem;
+typedef ptrdiff_t l_mem;
+typedef unsigned char lu_byte;
+#define IntPoint(p)((unsigned int)(lu_mem)(p))
+typedef union{double u;void*s;long l;}L_Umaxalign;
+typedef double l_uacNumber;
+#define check_exp(c,e)(e)
+#define UNUSED(x)((void)(x))
+#define cast(t,exp)((t)(exp))
+#define cast_byte(i)cast(lu_byte,(i))
+#define cast_num(i)cast(lua_Number,(i))
+#define cast_int(i)cast(int,(i))
+typedef lu_int32 Instruction;
+#define condhardstacktests(x)((void)0)
+typedef union GCObject GCObject;
+typedef struct GCheader{
+GCObject*next;lu_byte tt;lu_byte marked;
+}GCheader;
+typedef union{
+GCObject*gc;
+void*p;
+lua_Number n;
+int b;
+}Value;
+typedef struct lua_TValue{
+Value value;int tt;
+}TValue;
+#define ttisnil(o)(ttype(o)==0)
+#define ttisnumber(o)(ttype(o)==3)
+#define ttisstring(o)(ttype(o)==4)
+#define ttistable(o)(ttype(o)==5)
+#define ttisfunction(o)(ttype(o)==6)
+#define ttisboolean(o)(ttype(o)==1)
+#define ttisuserdata(o)(ttype(o)==7)
+#define ttisthread(o)(ttype(o)==8)
+#define ttislightuserdata(o)(ttype(o)==2)
+#define ttype(o)((o)->tt)
+#define gcvalue(o)check_exp(iscollectable(o),(o)->value.gc)
+#define pvalue(o)check_exp(ttislightuserdata(o),(o)->value.p)
+#define nvalue(o)check_exp(ttisnumber(o),(o)->value.n)
+#define rawtsvalue(o)check_exp(ttisstring(o),&(o)->value.gc->ts)
+#define tsvalue(o)(&rawtsvalue(o)->tsv)
+#define rawuvalue(o)check_exp(ttisuserdata(o),&(o)->value.gc->u)
+#define uvalue(o)(&rawuvalue(o)->uv)
+#define clvalue(o)check_exp(ttisfunction(o),&(o)->value.gc->cl)
+#define hvalue(o)check_exp(ttistable(o),&(o)->value.gc->h)
+#define bvalue(o)check_exp(ttisboolean(o),(o)->value.b)
+#define thvalue(o)check_exp(ttisthread(o),&(o)->value.gc->th)
+#define l_isfalse(o)(ttisnil(o)||(ttisboolean(o)&&bvalue(o)==0))
+#define checkconsistency(obj)
+#define checkliveness(g,obj)
+#define setnilvalue(obj)((obj)->tt=0)
+#define setnvalue(obj,x){TValue*i_o=(obj);i_o->value.n=(x);i_o->tt=3;}
+#define setbvalue(obj,x){TValue*i_o=(obj);i_o->value.b=(x);i_o->tt=1;}
+#define setsvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=4;checkliveness(G(L),i_o);}
+#define setuvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=7;checkliveness(G(L),i_o);}
+#define setthvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=8;checkliveness(G(L),i_o);}
+#define setclvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=6;checkliveness(G(L),i_o);}
+#define sethvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=5;checkliveness(G(L),i_o);}
+#define setptvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=(8+1);checkliveness(G(L),i_o);}
+#define setobj(L,obj1,obj2){const TValue*o2=(obj2);TValue*o1=(obj1);o1->value=o2->value;o1->tt=o2->tt;checkliveness(G(L),o1);}
+#define setttype(obj,tt)(ttype(obj)=(tt))
+#define iscollectable(o)(ttype(o)>=4)
+typedef TValue*StkId;
+typedef union TString{
+L_Umaxalign dummy;
+struct{
+GCObject*next;lu_byte tt;lu_byte marked;
+lu_byte reserved;
+unsigned int hash;
+size_t len;
+}tsv;
+}TString;
+#define getstr(ts)cast(const char*,(ts)+1)
+#define svalue(o)getstr(rawtsvalue(o))
+typedef union Udata{
+L_Umaxalign dummy;
+struct{
+GCObject*next;lu_byte tt;lu_byte marked;
+struct Table*metatable;
+struct Table*env;
+size_t len;
+}uv;
+}Udata;
+typedef struct Proto{
+GCObject*next;lu_byte tt;lu_byte marked;
+TValue*k;
+Instruction*code;
+struct Proto**p;
+int*lineinfo;
+struct LocVar*locvars;
+TString**upvalues;
+TString*source;
+int sizeupvalues;
+int sizek;
+int sizecode;
+int sizelineinfo;
+int sizep;
+int sizelocvars;
+int linedefined;
+int lastlinedefined;
+GCObject*gclist;
+lu_byte nups;
+lu_byte numparams;
+lu_byte is_vararg;
+lu_byte maxstacksize;
+}Proto;
+typedef struct LocVar{
+TString*varname;
+int startpc;
+int endpc;
+}LocVar;
+typedef struct UpVal{
+GCObject*next;lu_byte tt;lu_byte marked;
+TValue*v;
+union{
+TValue value;
+struct{
+struct UpVal*prev;
+struct UpVal*next;
+}l;
+}u;
+}UpVal;
+typedef struct CClosure{
+GCObject*next;lu_byte tt;lu_byte marked;lu_byte isC;lu_byte nupvalues;GCObject*gclist;struct Table*env;
+lua_CFunction f;
+TValue upvalue[1];
+}CClosure;
+typedef struct LClosure{
+GCObject*next;lu_byte tt;lu_byte marked;lu_byte isC;lu_byte nupvalues;GCObject*gclist;struct Table*env;
+struct Proto*p;
+UpVal*upvals[1];
+}LClosure;
+typedef union Closure{
+CClosure c;
+LClosure l;
+}Closure;
+#define iscfunction(o)(ttype(o)==6&&clvalue(o)->c.isC)
+typedef union TKey{
+struct{
+Value value;int tt;
+struct Node*next;
+}nk;
+TValue tvk;
+}TKey;
+typedef struct Node{
+TValue i_val;
+TKey i_key;
+}Node;
+typedef struct Table{
+GCObject*next;lu_byte tt;lu_byte marked;
+lu_byte flags;
+lu_byte lsizenode;
+struct Table*metatable;
+TValue*array;
+Node*node;
+Node*lastfree;
+GCObject*gclist;
+int sizearray;
+}Table;
+#define lmod(s,size)(check_exp((size&(size-1))==0,(cast(int,(s)&((size)-1)))))
+#define twoto(x)((size_t)1<<(x))
+#define sizenode(t)(twoto((t)->lsizenode))
+static const TValue luaO_nilobject_;
+#define ceillog2(x)(luaO_log2((x)-1)+1)
+static int luaO_log2(unsigned int x);
+#define gfasttm(g,et,e)((et)==NULL?NULL:((et)->flags&(1u<<(e)))?NULL:luaT_gettm(et,e,(g)->tmname[e]))
+#define fasttm(l,et,e)gfasttm(G(l),et,e)
+static const TValue*luaT_gettm(Table*events,TMS event,TString*ename);
+#define luaM_reallocv(L,b,on,n,e)((cast(size_t,(n)+1)<=((size_t)(~(size_t)0)-2)/(e))?luaM_realloc_(L,(b),(on)*(e),(n)*(e)):luaM_toobig(L))
+#define luaM_freemem(L,b,s)luaM_realloc_(L,(b),(s),0)
+#define luaM_free(L,b)luaM_realloc_(L,(b),sizeof(*(b)),0)
+#define luaM_freearray(L,b,n,t)luaM_reallocv(L,(b),n,0,sizeof(t))
+#define luaM_malloc(L,t)luaM_realloc_(L,NULL,0,(t))
+#define luaM_new(L,t)cast(t*,luaM_malloc(L,sizeof(t)))
+#define luaM_newvector(L,n,t)cast(t*,luaM_reallocv(L,NULL,0,n,sizeof(t)))
+#define luaM_growvector(L,v,nelems,size,t,limit,e)if((nelems)+1>(size))((v)=cast(t*,luaM_growaux_(L,v,&(size),sizeof(t),limit,e)))
+#define luaM_reallocvector(L,v,oldn,n,t)((v)=cast(t*,luaM_reallocv(L,v,oldn,n,sizeof(t))))
+static void*luaM_realloc_(lua_State*L,void*block,size_t oldsize,
+size_t size);
+static void*luaM_toobig(lua_State*L);
+static void*luaM_growaux_(lua_State*L,void*block,int*size,
+size_t size_elem,int limit,
+const char*errormsg);
+typedef struct Zio ZIO;
+#define char2int(c)cast(int,cast(unsigned char,(c)))
+#define zgetc(z)(((z)->n--)>0?char2int(*(z)->p++):luaZ_fill(z))
+typedef struct Mbuffer{
+char*buffer;
+size_t n;
+size_t buffsize;
+}Mbuffer;
+#define luaZ_initbuffer(L,buff)((buff)->buffer=NULL,(buff)->buffsize=0)
+#define luaZ_buffer(buff)((buff)->buffer)
+#define luaZ_sizebuffer(buff)((buff)->buffsize)
+#define luaZ_bufflen(buff)((buff)->n)
+#define luaZ_resetbuffer(buff)((buff)->n=0)
+#define luaZ_resizebuffer(L,buff,size)(luaM_reallocvector(L,(buff)->buffer,(buff)->buffsize,size,char),(buff)->buffsize=size)
+#define luaZ_freebuffer(L,buff)luaZ_resizebuffer(L,buff,0)
+struct Zio{
+size_t n;
+const char*p;
+lua_Reader reader;
+void*data;
+lua_State*L;
+};
+static int luaZ_fill(ZIO*z);
+struct lua_longjmp;
+#define gt(L)(&L->l_gt)
+#define registry(L)(&G(L)->l_registry)
+typedef struct stringtable{
+GCObject**hash;
+lu_int32 nuse;
+int size;
+}stringtable;
+typedef struct CallInfo{
+StkId base;
+StkId func;
+StkId top;
+const Instruction*savedpc;
+int nresults;
+int tailcalls;
+}CallInfo;
+#define curr_func(L)(clvalue(L->ci->func))
+#define ci_func(ci)(clvalue((ci)->func))
+#define f_isLua(ci)(!ci_func(ci)->c.isC)
+#define isLua(ci)(ttisfunction((ci)->func)&&f_isLua(ci))
+typedef struct global_State{
+stringtable strt;
+lua_Alloc frealloc;
+void*ud;
+lu_byte currentwhite;
+lu_byte gcstate;
+int sweepstrgc;
+GCObject*rootgc;
+GCObject**sweepgc;
+GCObject*gray;
+GCObject*grayagain;
+GCObject*weak;
+GCObject*tmudata;
+Mbuffer buff;
+lu_mem GCthreshold;
+lu_mem totalbytes;
+lu_mem estimate;
+lu_mem gcdept;
+int gcpause;
+int gcstepmul;
+lua_CFunction panic;
+TValue l_registry;
+struct lua_State*mainthread;
+UpVal uvhead;
+struct Table*mt[(8+1)];
+TString*tmname[TM_N];
+}global_State;
+struct lua_State{
+GCObject*next;lu_byte tt;lu_byte marked;
+lu_byte status;
+StkId top;
+StkId base;
+global_State*l_G;
+CallInfo*ci;
+const Instruction*savedpc;
+StkId stack_last;
+StkId stack;
+CallInfo*end_ci;
+CallInfo*base_ci;
+int stacksize;
+int size_ci;
+unsigned short nCcalls;
+unsigned short baseCcalls;
+lu_byte hookmask;
+lu_byte allowhook;
+int basehookcount;
+int hookcount;
+lua_Hook hook;
+TValue l_gt;
+TValue env;
+GCObject*openupval;
+GCObject*gclist;
+struct lua_longjmp*errorJmp;
+ptrdiff_t errfunc;
+};
+#define G(L)(L->l_G)
+union GCObject{
+GCheader gch;
+union TString ts;
+union Udata u;
+union Closure cl;
+struct Table h;
+struct Proto p;
+struct UpVal uv;
+struct lua_State th;
+};
+#define rawgco2ts(o)check_exp((o)->gch.tt==4,&((o)->ts))
+#define gco2ts(o)(&rawgco2ts(o)->tsv)
+#define rawgco2u(o)check_exp((o)->gch.tt==7,&((o)->u))
+#define gco2u(o)(&rawgco2u(o)->uv)
+#define gco2cl(o)check_exp((o)->gch.tt==6,&((o)->cl))
+#define gco2h(o)check_exp((o)->gch.tt==5,&((o)->h))
+#define gco2p(o)check_exp((o)->gch.tt==(8+1),&((o)->p))
+#define gco2uv(o)check_exp((o)->gch.tt==(8+2),&((o)->uv))
+#define ngcotouv(o)check_exp((o)==NULL||(o)->gch.tt==(8+2),&((o)->uv))
+#define gco2th(o)check_exp((o)->gch.tt==8,&((o)->th))
+#define obj2gco(v)(cast(GCObject*,(v)))
+static void luaE_freethread(lua_State*L,lua_State*L1);
+#define pcRel(pc,p)(cast(int,(pc)-(p)->code)-1)
+#define getline_(f,pc)(((f)->lineinfo)?(f)->lineinfo[pc]:0)
+#define resethookcount(L)(L->hookcount=L->basehookcount)
+static void luaG_typeerror(lua_State*L,const TValue*o,
+const char*opname);
+static void luaG_runerror(lua_State*L,const char*fmt,...);
+#define luaD_checkstack(L,n)if((char*)L->stack_last-(char*)L->top<=(n)*(int)sizeof(TValue))luaD_growstack(L,n);else condhardstacktests(luaD_reallocstack(L,L->stacksize-5-1));
+#define incr_top(L){luaD_checkstack(L,1);L->top++;}
+#define savestack(L,p)((char*)(p)-(char*)L->stack)
+#define restorestack(L,n)((TValue*)((char*)L->stack+(n)))
+#define saveci(L,p)((char*)(p)-(char*)L->base_ci)
+#define restoreci(L,n)((CallInfo*)((char*)L->base_ci+(n)))
+typedef void(*Pfunc)(lua_State*L,void*ud);
+static int luaD_poscall(lua_State*L,StkId firstResult);
+static void luaD_reallocCI(lua_State*L,int newsize);
+static void luaD_reallocstack(lua_State*L,int newsize);
+static void luaD_growstack(lua_State*L,int n);
+static void luaD_throw(lua_State*L,int errcode);
+static void*luaM_growaux_(lua_State*L,void*block,int*size,size_t size_elems,
+int limit,const char*errormsg){
+void*newblock;
+int newsize;
+if(*size>=limit/2){
+if(*size>=limit)
+luaG_runerror(L,errormsg);
+newsize=limit;
+}
+else{
+newsize=(*size)*2;
+if(newsize<4)
+newsize=4;
+}
+newblock=luaM_reallocv(L,block,*size,newsize,size_elems);
+*size=newsize;
+return newblock;
+}
+static void*luaM_toobig(lua_State*L){
+luaG_runerror(L,"memory allocation error: block too big");
+return NULL;
+}
+static void*luaM_realloc_(lua_State*L,void*block,size_t osize,size_t nsize){
+global_State*g=G(L);
+block=(*g->frealloc)(g->ud,block,osize,nsize);
+if(block==NULL&&nsize>0)
+luaD_throw(L,4);
+g->totalbytes=(g->totalbytes-osize)+nsize;
+return block;
+}
+#define resetbits(x,m)((x)&=cast(lu_byte,~(m)))
+#define setbits(x,m)((x)|=(m))
+#define testbits(x,m)((x)&(m))
+#define bitmask(b)(1<<(b))
+#define bit2mask(b1,b2)(bitmask(b1)|bitmask(b2))
+#define l_setbit(x,b)setbits(x,bitmask(b))
+#define resetbit(x,b)resetbits(x,bitmask(b))
+#define testbit(x,b)testbits(x,bitmask(b))
+#define set2bits(x,b1,b2)setbits(x,(bit2mask(b1,b2)))
+#define reset2bits(x,b1,b2)resetbits(x,(bit2mask(b1,b2)))
+#define test2bits(x,b1,b2)testbits(x,(bit2mask(b1,b2)))
+#define iswhite(x)test2bits((x)->gch.marked,0,1)
+#define isblack(x)testbit((x)->gch.marked,2)
+#define isgray(x)(!isblack(x)&&!iswhite(x))
+#define otherwhite(g)(g->currentwhite^bit2mask(0,1))
+#define isdead(g,v)((v)->gch.marked&otherwhite(g)&bit2mask(0,1))
+#define changewhite(x)((x)->gch.marked^=bit2mask(0,1))
+#define gray2black(x)l_setbit((x)->gch.marked,2)
+#define valiswhite(x)(iscollectable(x)&&iswhite(gcvalue(x)))
+#define luaC_white(g)cast(lu_byte,(g)->currentwhite&bit2mask(0,1))
+#define luaC_checkGC(L){condhardstacktests(luaD_reallocstack(L,L->stacksize-5-1));if(G(L)->totalbytes>=G(L)->GCthreshold)luaC_step(L);}
+#define luaC_barrier(L,p,v){if(valiswhite(v)&&isblack(obj2gco(p)))luaC_barrierf(L,obj2gco(p),gcvalue(v));}
+#define luaC_barriert(L,t,v){if(valiswhite(v)&&isblack(obj2gco(t)))luaC_barrierback(L,t);}
+#define luaC_objbarrier(L,p,o){if(iswhite(obj2gco(o))&&isblack(obj2gco(p)))luaC_barrierf(L,obj2gco(p),obj2gco(o));}
+#define luaC_objbarriert(L,t,o){if(iswhite(obj2gco(o))&&isblack(obj2gco(t)))luaC_barrierback(L,t);}
+static void luaC_step(lua_State*L);
+static void luaC_link(lua_State*L,GCObject*o,lu_byte tt);
+static void luaC_linkupval(lua_State*L,UpVal*uv);
+static void luaC_barrierf(lua_State*L,GCObject*o,GCObject*v);
+static void luaC_barrierback(lua_State*L,Table*t);
+#define sizestring(s)(sizeof(union TString)+((s)->len+1)*sizeof(char))
+#define sizeudata(u)(sizeof(union Udata)+(u)->len)
+#define luaS_new(L,s)(luaS_newlstr(L,s,strlen(s)))
+#define luaS_newliteral(L,s)(luaS_newlstr(L,""s,(sizeof(s)/sizeof(char))-1))
+#define luaS_fix(s)l_setbit((s)->tsv.marked,5)
+static TString*luaS_newlstr(lua_State*L,const char*str,size_t l);
+#define tostring(L,o)((ttype(o)==4)||(luaV_tostring(L,o)))
+#define tonumber(o,n)(ttype(o)==3||(((o)=luaV_tonumber(o,n))!=NULL))
+#define equalobj(L,o1,o2)(ttype(o1)==ttype(o2)&&luaV_equalval(L,o1,o2))
+static int luaV_equalval(lua_State*L,const TValue*t1,const TValue*t2);
+static const TValue*luaV_tonumber(const TValue*obj,TValue*n);
+static int luaV_tostring(lua_State*L,StkId obj);
+static void luaV_execute(lua_State*L,int nexeccalls);
+static void luaV_concat(lua_State*L,int total,int last);
+static const TValue luaO_nilobject_={{NULL},0};
+static int luaO_int2fb(unsigned int x){
+int e=0;
+while(x>=16){
+x=(x+1)>>1;
+e++;
+}
+if(x<8)return x;
+else return((e+1)<<3)|(cast_int(x)-8);
+}
+static int luaO_fb2int(int x){
+int e=(x>>3)&31;
+if(e==0)return x;
+else return((x&7)+8)<<(e-1);
+}
+static int luaO_log2(unsigned int x){
+static const lu_byte log_2[256]={
+0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+};
+int l=-1;
+while(x>=256){l+=8;x>>=8;}
+return l+log_2[x];
+}
+static int luaO_rawequalObj(const TValue*t1,const TValue*t2){
+if(ttype(t1)!=ttype(t2))return 0;
+else switch(ttype(t1)){
+case 0:
+return 1;
+case 3:
+return luai_numeq(nvalue(t1),nvalue(t2));
+case 1:
+return bvalue(t1)==bvalue(t2);
+case 2:
+return pvalue(t1)==pvalue(t2);
+default:
+return gcvalue(t1)==gcvalue(t2);
+}
+}
+static int luaO_str2d(const char*s,lua_Number*result){
+char*endptr;
+*result=lua_str2number(s,&endptr);
+if(endptr==s)return 0;
+if(*endptr=='x'||*endptr=='X')
+*result=cast_num(strtoul(s,&endptr,16));
+if(*endptr=='\0')return 1;
+while(isspace(cast(unsigned char,*endptr)))endptr++;
+if(*endptr!='\0')return 0;
+return 1;
+}
+static void pushstr(lua_State*L,const char*str){
+setsvalue(L,L->top,luaS_new(L,str));
+incr_top(L);
+}
+static const char*luaO_pushvfstring(lua_State*L,const char*fmt,va_list argp){
+int n=1;
+pushstr(L,"");
+for(;;){
+const char*e=strchr(fmt,'%');
+if(e==NULL)break;
+setsvalue(L,L->top,luaS_newlstr(L,fmt,e-fmt));
+incr_top(L);
+switch(*(e+1)){
+case's':{
+const char*s=va_arg(argp,char*);
+if(s==NULL)s="(null)";
+pushstr(L,s);
+break;
+}
+case'c':{
+char buff[2];
+buff[0]=cast(char,va_arg(argp,int));
+buff[1]='\0';
+pushstr(L,buff);
+break;
+}
+case'd':{
+setnvalue(L->top,cast_num(va_arg(argp,int)));
+incr_top(L);
+break;
+}
+case'f':{
+setnvalue(L->top,cast_num(va_arg(argp,l_uacNumber)));
+incr_top(L);
+break;
+}
+case'p':{
+char buff[4*sizeof(void*)+8];
+sprintf(buff,"%p",va_arg(argp,void*));
+pushstr(L,buff);
+break;
+}
+case'%':{
+pushstr(L,"%");
+break;
+}
+default:{
+char buff[3];
+buff[0]='%';
+buff[1]=*(e+1);
+buff[2]='\0';
+pushstr(L,buff);
+break;
+}
+}
+n+=2;
+fmt=e+2;
+}
+pushstr(L,fmt);
+luaV_concat(L,n+1,cast_int(L->top-L->base)-1);
+L->top-=n;
+return svalue(L->top-1);
+}
+static const char*luaO_pushfstring(lua_State*L,const char*fmt,...){
+const char*msg;
+va_list argp;
+va_start(argp,fmt);
+msg=luaO_pushvfstring(L,fmt,argp);
+va_end(argp);
+return msg;
+}
+static void luaO_chunkid(char*out,const char*source,size_t bufflen){
+if(*source=='='){
+strncpy(out,source+1,bufflen);
+out[bufflen-1]='\0';
+}
+else{
+if(*source=='@'){
+size_t l;
+source++;
+bufflen-=sizeof(" '...' ");
+l=strlen(source);
+strcpy(out,"");
+if(l>bufflen){
+source+=(l-bufflen);
+strcat(out,"...");
+}
+strcat(out,source);
+}
+else{
+size_t len=strcspn(source,"\n\r");
+bufflen-=sizeof(" [string \"...\"] ");
+if(len>bufflen)len=bufflen;
+strcpy(out,"[string \"");
+if(source[len]!='\0'){
+strncat(out,source,len);
+strcat(out,"...");
+}
+else
+strcat(out,source);
+strcat(out,"\"]");
+}
+}
+}
+#define gnode(t,i)(&(t)->node[i])
+#define gkey(n)(&(n)->i_key.nk)
+#define gval(n)(&(n)->i_val)
+#define gnext(n)((n)->i_key.nk.next)
+#define key2tval(n)(&(n)->i_key.tvk)
+static TValue*luaH_setnum(lua_State*L,Table*t,int key);
+static const TValue*luaH_getstr(Table*t,TString*key);
+static TValue*luaH_set(lua_State*L,Table*t,const TValue*key);
+static const char*const luaT_typenames[]={
+"nil","boolean","userdata","number",
+"string","table","function","userdata","thread",
+"proto","upval"
+};
+static void luaT_init(lua_State*L){
+static const char*const luaT_eventname[]={
+"__index","__newindex",
+"__gc","__mode","__eq",
+"__add","__sub","__mul","__div","__mod",
+"__pow","__unm","__len","__lt","__le",
+"__concat","__call"
+};
+int i;
+for(i=0;i<TM_N;i++){
+G(L)->tmname[i]=luaS_new(L,luaT_eventname[i]);
+luaS_fix(G(L)->tmname[i]);
+}
+}
+static const TValue*luaT_gettm(Table*events,TMS event,TString*ename){
+const TValue*tm=luaH_getstr(events,ename);
+if(ttisnil(tm)){
+events->flags|=cast_byte(1u<<event);
+return NULL;
+}
+else return tm;
+}
+static const TValue*luaT_gettmbyobj(lua_State*L,const TValue*o,TMS event){
+Table*mt;
+switch(ttype(o)){
+case 5:
+mt=hvalue(o)->metatable;
+break;
+case 7:
+mt=uvalue(o)->metatable;
+break;
+default:
+mt=G(L)->mt[ttype(o)];
+}
+return(mt?luaH_getstr(mt,G(L)->tmname[event]):(&luaO_nilobject_));
+}
+#define sizeCclosure(n)(cast(int,sizeof(CClosure))+cast(int,sizeof(TValue)*((n)-1)))
+#define sizeLclosure(n)(cast(int,sizeof(LClosure))+cast(int,sizeof(TValue*)*((n)-1)))
+static Closure*luaF_newCclosure(lua_State*L,int nelems,Table*e){
+Closure*c=cast(Closure*,luaM_malloc(L,sizeCclosure(nelems)));
+luaC_link(L,obj2gco(c),6);
+c->c.isC=1;
+c->c.env=e;
+c->c.nupvalues=cast_byte(nelems);
+return c;
+}
+static Closure*luaF_newLclosure(lua_State*L,int nelems,Table*e){
+Closure*c=cast(Closure*,luaM_malloc(L,sizeLclosure(nelems)));
+luaC_link(L,obj2gco(c),6);
+c->l.isC=0;
+c->l.env=e;
+c->l.nupvalues=cast_byte(nelems);
+while(nelems--)c->l.upvals[nelems]=NULL;
+return c;
+}
+static UpVal*luaF_newupval(lua_State*L){
+UpVal*uv=luaM_new(L,UpVal);
+luaC_link(L,obj2gco(uv),(8+2));
+uv->v=&uv->u.value;
+setnilvalue(uv->v);
+return uv;
+}
+static UpVal*luaF_findupval(lua_State*L,StkId level){
+global_State*g=G(L);
+GCObject**pp=&L->openupval;
+UpVal*p;
+UpVal*uv;
+while(*pp!=NULL&&(p=ngcotouv(*pp))->v>=level){
+if(p->v==level){
+if(isdead(g,obj2gco(p)))
+changewhite(obj2gco(p));
+return p;
+}
+pp=&p->next;
+}
+uv=luaM_new(L,UpVal);
+uv->tt=(8+2);
+uv->marked=luaC_white(g);
+uv->v=level;
+uv->next=*pp;
+*pp=obj2gco(uv);
+uv->u.l.prev=&g->uvhead;
+uv->u.l.next=g->uvhead.u.l.next;
+uv->u.l.next->u.l.prev=uv;
+g->uvhead.u.l.next=uv;
+return uv;
+}
+static void unlinkupval(UpVal*uv){
+uv->u.l.next->u.l.prev=uv->u.l.prev;
+uv->u.l.prev->u.l.next=uv->u.l.next;
+}
+static void luaF_freeupval(lua_State*L,UpVal*uv){
+if(uv->v!=&uv->u.value)
+unlinkupval(uv);
+luaM_free(L,uv);
+}
+static void luaF_close(lua_State*L,StkId level){
+UpVal*uv;
+global_State*g=G(L);
+while(L->openupval!=NULL&&(uv=ngcotouv(L->openupval))->v>=level){
+GCObject*o=obj2gco(uv);
+L->openupval=uv->next;
+if(isdead(g,o))
+luaF_freeupval(L,uv);
+else{
+unlinkupval(uv);
+setobj(L,&uv->u.value,uv->v);
+uv->v=&uv->u.value;
+luaC_linkupval(L,uv);
+}
+}
+}
+static Proto*luaF_newproto(lua_State*L){
+Proto*f=luaM_new(L,Proto);
+luaC_link(L,obj2gco(f),(8+1));
+f->k=NULL;
+f->sizek=0;
+f->p=NULL;
+f->sizep=0;
+f->code=NULL;
+f->sizecode=0;
+f->sizelineinfo=0;
+f->sizeupvalues=0;
+f->nups=0;
+f->upvalues=NULL;
+f->numparams=0;
+f->is_vararg=0;
+f->maxstacksize=0;
+f->lineinfo=NULL;
+f->sizelocvars=0;
+f->locvars=NULL;
+f->linedefined=0;
+f->lastlinedefined=0;
+f->source=NULL;
+return f;
+}
+static void luaF_freeproto(lua_State*L,Proto*f){
+luaM_freearray(L,f->code,f->sizecode,Instruction);
+luaM_freearray(L,f->p,f->sizep,Proto*);
+luaM_freearray(L,f->k,f->sizek,TValue);
+luaM_freearray(L,f->lineinfo,f->sizelineinfo,int);
+luaM_freearray(L,f->locvars,f->sizelocvars,struct LocVar);
+luaM_freearray(L,f->upvalues,f->sizeupvalues,TString*);
+luaM_free(L,f);
+}
+static void luaF_freeclosure(lua_State*L,Closure*c){
+int size=(c->c.isC)?sizeCclosure(c->c.nupvalues):
+sizeLclosure(c->l.nupvalues);
+luaM_freemem(L,c,size);
+}
+#define MASK1(n,p)((~((~(Instruction)0)<<n))<<p)
+#define MASK0(n,p)(~MASK1(n,p))
+#define GET_OPCODE(i)(cast(OpCode,((i)>>0)&MASK1(6,0)))
+#define SET_OPCODE(i,o)((i)=(((i)&MASK0(6,0))|((cast(Instruction,o)<<0)&MASK1(6,0))))
+#define GETARG_A(i)(cast(int,((i)>>(0+6))&MASK1(8,0)))
+#define SETARG_A(i,u)((i)=(((i)&MASK0(8,(0+6)))|((cast(Instruction,u)<<(0+6))&MASK1(8,(0+6)))))
+#define GETARG_B(i)(cast(int,((i)>>(((0+6)+8)+9))&MASK1(9,0)))
+#define SETARG_B(i,b)((i)=(((i)&MASK0(9,(((0+6)+8)+9)))|((cast(Instruction,b)<<(((0+6)+8)+9))&MASK1(9,(((0+6)+8)+9)))))
+#define GETARG_C(i)(cast(int,((i)>>((0+6)+8))&MASK1(9,0)))
+#define SETARG_C(i,b)((i)=(((i)&MASK0(9,((0+6)+8)))|((cast(Instruction,b)<<((0+6)+8))&MASK1(9,((0+6)+8)))))
+#define GETARG_Bx(i)(cast(int,((i)>>((0+6)+8))&MASK1((9+9),0)))
+#define SETARG_Bx(i,b)((i)=(((i)&MASK0((9+9),((0+6)+8)))|((cast(Instruction,b)<<((0+6)+8))&MASK1((9+9),((0+6)+8)))))
+#define GETARG_sBx(i)(GETARG_Bx(i)-(((1<<(9+9))-1)>>1))
+#define SETARG_sBx(i,b)SETARG_Bx((i),cast(unsigned int,(b)+(((1<<(9+9))-1)>>1)))
+#define CREATE_ABC(o,a,b,c)((cast(Instruction,o)<<0)|(cast(Instruction,a)<<(0+6))|(cast(Instruction,b)<<(((0+6)+8)+9))|(cast(Instruction,c)<<((0+6)+8)))
+#define CREATE_ABx(o,a,bc)((cast(Instruction,o)<<0)|(cast(Instruction,a)<<(0+6))|(cast(Instruction,bc)<<((0+6)+8)))
+#define ISK(x)((x)&(1<<(9-1)))
+#define INDEXK(r)((int)(r)&~(1<<(9-1)))
+#define RKASK(x)((x)|(1<<(9-1)))
+static const lu_byte luaP_opmodes[(cast(int,OP_VARARG)+1)];
+#define getBMode(m)(cast(enum OpArgMask,(luaP_opmodes[m]>>4)&3))
+#define getCMode(m)(cast(enum OpArgMask,(luaP_opmodes[m]>>2)&3))
+#define testTMode(m)(luaP_opmodes[m]&(1<<7))
+typedef struct expdesc{
+expkind k;
+union{
+struct{int info,aux;}s;
+lua_Number nval;
+}u;
+int t;
+int f;
+}expdesc;
+typedef struct upvaldesc{
+lu_byte k;
+lu_byte info;
+}upvaldesc;
+struct BlockCnt;
+typedef struct FuncState{
+Proto*f;
+Table*h;
+struct FuncState*prev;
+struct LexState*ls;
+struct lua_State*L;
+struct BlockCnt*bl;
+int pc;
+int lasttarget;
+int jpc;
+int freereg;
+int nk;
+int np;
+short nlocvars;
+lu_byte nactvar;
+upvaldesc upvalues[60];
+unsigned short actvar[200];
+}FuncState;
+static Proto*luaY_parser(lua_State*L,ZIO*z,Mbuffer*buff,
+const char*name);
+struct lua_longjmp{
+struct lua_longjmp*previous;
+jmp_buf b;
+volatile int status;
+};
+static void luaD_seterrorobj(lua_State*L,int errcode,StkId oldtop){
+switch(errcode){
+case 4:{
+setsvalue(L,oldtop,luaS_newliteral(L,"not enough memory"));
+break;
+}
+case 5:{
+setsvalue(L,oldtop,luaS_newliteral(L,"error in error handling"));
+break;
+}
+case 3:
+case 2:{
+setobj(L,oldtop,L->top-1);
+break;
+}
+}
+L->top=oldtop+1;
+}
+static void restore_stack_limit(lua_State*L){
+if(L->size_ci>20000){
+int inuse=cast_int(L->ci-L->base_ci);
+if(inuse+1<20000)
+luaD_reallocCI(L,20000);
+}
+}
+static void resetstack(lua_State*L,int status){
+L->ci=L->base_ci;
+L->base=L->ci->base;
+luaF_close(L,L->base);
+luaD_seterrorobj(L,status,L->base);
+L->nCcalls=L->baseCcalls;
+L->allowhook=1;
+restore_stack_limit(L);
+L->errfunc=0;
+L->errorJmp=NULL;
+}
+static void luaD_throw(lua_State*L,int errcode){
+if(L->errorJmp){
+L->errorJmp->status=errcode;
+LUAI_THROW(L,L->errorJmp);
+}
+else{
+L->status=cast_byte(errcode);
+if(G(L)->panic){
+resetstack(L,errcode);
+G(L)->panic(L);
+}
+exit(EXIT_FAILURE);
+}
+}
+static int luaD_rawrunprotected(lua_State*L,Pfunc f,void*ud){
+struct lua_longjmp lj;
+lj.status=0;
+lj.previous=L->errorJmp;
+L->errorJmp=&lj;
+LUAI_TRY(L,&lj,
+(*f)(L,ud);
+);
+L->errorJmp=lj.previous;
+return lj.status;
+}
+static void correctstack(lua_State*L,TValue*oldstack){
+CallInfo*ci;
+GCObject*up;
+L->top=(L->top-oldstack)+L->stack;
+for(up=L->openupval;up!=NULL;up=up->gch.next)
+gco2uv(up)->v=(gco2uv(up)->v-oldstack)+L->stack;
+for(ci=L->base_ci;ci<=L->ci;ci++){
+ci->top=(ci->top-oldstack)+L->stack;
+ci->base=(ci->base-oldstack)+L->stack;
+ci->func=(ci->func-oldstack)+L->stack;
+}
+L->base=(L->base-oldstack)+L->stack;
+}
+static void luaD_reallocstack(lua_State*L,int newsize){
+TValue*oldstack=L->stack;
+int realsize=newsize+1+5;
+luaM_reallocvector(L,L->stack,L->stacksize,realsize,TValue);
+L->stacksize=realsize;
+L->stack_last=L->stack+newsize;
+correctstack(L,oldstack);
+}
+static void luaD_reallocCI(lua_State*L,int newsize){
+CallInfo*oldci=L->base_ci;
+luaM_reallocvector(L,L->base_ci,L->size_ci,newsize,CallInfo);
+L->size_ci=newsize;
+L->ci=(L->ci-oldci)+L->base_ci;
+L->end_ci=L->base_ci+L->size_ci-1;
+}
+static void luaD_growstack(lua_State*L,int n){
+if(n<=L->stacksize)
+luaD_reallocstack(L,2*L->stacksize);
+else
+luaD_reallocstack(L,L->stacksize+n);
+}
+static CallInfo*growCI(lua_State*L){
+if(L->size_ci>20000)
+luaD_throw(L,5);
+else{
+luaD_reallocCI(L,2*L->size_ci);
+if(L->size_ci>20000)
+luaG_runerror(L,"stack overflow");
+}
+return++L->ci;
+}
+static StkId adjust_varargs(lua_State*L,Proto*p,int actual){
+int i;
+int nfixargs=p->numparams;
+Table*htab=NULL;
+StkId base,fixed;
+for(;actual<nfixargs;++actual)
+setnilvalue(L->top++);
+fixed=L->top-actual;
+base=L->top;
+for(i=0;i<nfixargs;i++){
+setobj(L,L->top++,fixed+i);
+setnilvalue(fixed+i);
+}
+if(htab){
+sethvalue(L,L->top++,htab);
+}
+return base;
+}
+static StkId tryfuncTM(lua_State*L,StkId func){
+const TValue*tm=luaT_gettmbyobj(L,func,TM_CALL);
+StkId p;
+ptrdiff_t funcr=savestack(L,func);
+if(!ttisfunction(tm))
+luaG_typeerror(L,func,"call");
+for(p=L->top;p>func;p--)setobj(L,p,p-1);
+incr_top(L);
+func=restorestack(L,funcr);
+setobj(L,func,tm);
+return func;
+}
+#define inc_ci(L)((L->ci==L->end_ci)?growCI(L):(condhardstacktests(luaD_reallocCI(L,L->size_ci)),++L->ci))
+static int luaD_precall(lua_State*L,StkId func,int nresults){
+LClosure*cl;
+ptrdiff_t funcr;
+if(!ttisfunction(func))
+func=tryfuncTM(L,func);
+funcr=savestack(L,func);
+cl=&clvalue(func)->l;
+L->ci->savedpc=L->savedpc;
+if(!cl->isC){
+CallInfo*ci;
+StkId st,base;
+Proto*p=cl->p;
+luaD_checkstack(L,p->maxstacksize);
+func=restorestack(L,funcr);
+if(!p->is_vararg){
+base=func+1;
+if(L->top>base+p->numparams)
+L->top=base+p->numparams;
+}
+else{
+int nargs=cast_int(L->top-func)-1;
+base=adjust_varargs(L,p,nargs);
+func=restorestack(L,funcr);
+}
+ci=inc_ci(L);
+ci->func=func;
+L->base=ci->base=base;
+ci->top=L->base+p->maxstacksize;
+L->savedpc=p->code;
+ci->tailcalls=0;
+ci->nresults=nresults;
+for(st=L->top;st<ci->top;st++)
+setnilvalue(st);
+L->top=ci->top;
+return 0;
+}
+else{
+CallInfo*ci;
+int n;
+luaD_checkstack(L,20);
+ci=inc_ci(L);
+ci->func=restorestack(L,funcr);
+L->base=ci->base=ci->func+1;
+ci->top=L->top+20;
+ci->nresults=nresults;
+n=(*curr_func(L)->c.f)(L);
+if(n<0)
+return 2;
+else{
+luaD_poscall(L,L->top-n);
+return 1;
+}
+}
+}
+static int luaD_poscall(lua_State*L,StkId firstResult){
+StkId res;
+int wanted,i;
+CallInfo*ci;
+ci=L->ci--;
+res=ci->func;
+wanted=ci->nresults;
+L->base=(ci-1)->base;
+L->savedpc=(ci-1)->savedpc;
+for(i=wanted;i!=0&&firstResult<L->top;i--)
+setobj(L,res++,firstResult++);
+while(i-->0)
+setnilvalue(res++);
+L->top=res;
+return(wanted-(-1));
+}
+static void luaD_call(lua_State*L,StkId func,int nResults){
+if(++L->nCcalls>=200){
+if(L->nCcalls==200)
+luaG_runerror(L,"C stack overflow");
+else if(L->nCcalls>=(200+(200>>3)))
+luaD_throw(L,5);
+}
+if(luaD_precall(L,func,nResults)==0)
+luaV_execute(L,1);
+L->nCcalls--;
+luaC_checkGC(L);
+}
+static int luaD_pcall(lua_State*L,Pfunc func,void*u,
+ptrdiff_t old_top,ptrdiff_t ef){
+int status;
+unsigned short oldnCcalls=L->nCcalls;
+ptrdiff_t old_ci=saveci(L,L->ci);
+lu_byte old_allowhooks=L->allowhook;
+ptrdiff_t old_errfunc=L->errfunc;
+L->errfunc=ef;
+status=luaD_rawrunprotected(L,func,u);
+if(status!=0){
+StkId oldtop=restorestack(L,old_top);
+luaF_close(L,oldtop);
+luaD_seterrorobj(L,status,oldtop);
+L->nCcalls=oldnCcalls;
+L->ci=restoreci(L,old_ci);
+L->base=L->ci->base;
+L->savedpc=L->ci->savedpc;
+L->allowhook=old_allowhooks;
+restore_stack_limit(L);
+}
+L->errfunc=old_errfunc;
+return status;
+}
+struct SParser{
+ZIO*z;
+Mbuffer buff;
+const char*name;
+};
+static void f_parser(lua_State*L,void*ud){
+int i;
+Proto*tf;
+Closure*cl;
+struct SParser*p=cast(struct SParser*,ud);
+luaC_checkGC(L);
+tf=luaY_parser(L,p->z,
+&p->buff,p->name);
+cl=luaF_newLclosure(L,tf->nups,hvalue(gt(L)));
+cl->l.p=tf;
+for(i=0;i<tf->nups;i++)
+cl->l.upvals[i]=luaF_newupval(L);
+setclvalue(L,L->top,cl);
+incr_top(L);
+}
+static int luaD_protectedparser(lua_State*L,ZIO*z,const char*name){
+struct SParser p;
+int status;
+p.z=z;p.name=name;
+luaZ_initbuffer(L,&p.buff);
+status=luaD_pcall(L,f_parser,&p,savestack(L,L->top),L->errfunc);
+luaZ_freebuffer(L,&p.buff);
+return status;
+}
+static void luaS_resize(lua_State*L,int newsize){
+GCObject**newhash;
+stringtable*tb;
+int i;
+if(G(L)->gcstate==2)
+return;
+newhash=luaM_newvector(L,newsize,GCObject*);
+tb=&G(L)->strt;
+for(i=0;i<newsize;i++)newhash[i]=NULL;
+for(i=0;i<tb->size;i++){
+GCObject*p=tb->hash[i];
+while(p){
+GCObject*next=p->gch.next;
+unsigned int h=gco2ts(p)->hash;
+int h1=lmod(h,newsize);
+p->gch.next=newhash[h1];
+newhash[h1]=p;
+p=next;
+}
+}
+luaM_freearray(L,tb->hash,tb->size,TString*);
+tb->size=newsize;
+tb->hash=newhash;
+}
+static TString*newlstr(lua_State*L,const char*str,size_t l,
+unsigned int h){
+TString*ts;
+stringtable*tb;
+if(l+1>(((size_t)(~(size_t)0)-2)-sizeof(TString))/sizeof(char))
+luaM_toobig(L);
+ts=cast(TString*,luaM_malloc(L,(l+1)*sizeof(char)+sizeof(TString)));
+ts->tsv.len=l;
+ts->tsv.hash=h;
+ts->tsv.marked=luaC_white(G(L));
+ts->tsv.tt=4;
+ts->tsv.reserved=0;
+memcpy(ts+1,str,l*sizeof(char));
+((char*)(ts+1))[l]='\0';
+tb=&G(L)->strt;
+h=lmod(h,tb->size);
+ts->tsv.next=tb->hash[h];
+tb->hash[h]=obj2gco(ts);
+tb->nuse++;
+if(tb->nuse>cast(lu_int32,tb->size)&&tb->size<=(INT_MAX-2)/2)
+luaS_resize(L,tb->size*2);
+return ts;
+}
+static TString*luaS_newlstr(lua_State*L,const char*str,size_t l){
+GCObject*o;
+unsigned int h=cast(unsigned int,l);
+size_t step=(l>>5)+1;
+size_t l1;
+for(l1=l;l1>=step;l1-=step)
+h=h^((h<<5)+(h>>2)+cast(unsigned char,str[l1-1]));
+for(o=G(L)->strt.hash[lmod(h,G(L)->strt.size)];
+o!=NULL;
+o=o->gch.next){
+TString*ts=rawgco2ts(o);
+if(ts->tsv.len==l&&(memcmp(str,getstr(ts),l)==0)){
+if(isdead(G(L),o))changewhite(o);
+return ts;
+}
+}
+return newlstr(L,str,l,h);
+}
+static Udata*luaS_newudata(lua_State*L,size_t s,Table*e){
+Udata*u;
+if(s>((size_t)(~(size_t)0)-2)-sizeof(Udata))
+luaM_toobig(L);
+u=cast(Udata*,luaM_malloc(L,s+sizeof(Udata)));
+u->uv.marked=luaC_white(G(L));
+u->uv.tt=7;
+u->uv.len=s;
+u->uv.metatable=NULL;
+u->uv.env=e;
+u->uv.next=G(L)->mainthread->next;
+G(L)->mainthread->next=obj2gco(u);
+return u;
+}
+#define hashpow2(t,n)(gnode(t,lmod((n),sizenode(t))))
+#define hashstr(t,str)hashpow2(t,(str)->tsv.hash)
+#define hashboolean(t,p)hashpow2(t,p)
+#define hashmod(t,n)(gnode(t,((n)%((sizenode(t)-1)|1))))
+#define hashpointer(t,p)hashmod(t,IntPoint(p))
+static const Node dummynode_={
+{{NULL},0},
+{{{NULL},0,NULL}}
+};
+static Node*hashnum(const Table*t,lua_Number n){
+unsigned int a[cast_int(sizeof(lua_Number)/sizeof(int))];
+int i;
+if(luai_numeq(n,0))
+return gnode(t,0);
+memcpy(a,&n,sizeof(a));
+for(i=1;i<cast_int(sizeof(lua_Number)/sizeof(int));i++)a[0]+=a[i];
+return hashmod(t,a[0]);
+}
+static Node*mainposition(const Table*t,const TValue*key){
+switch(ttype(key)){
+case 3:
+return hashnum(t,nvalue(key));
+case 4:
+return hashstr(t,rawtsvalue(key));
+case 1:
+return hashboolean(t,bvalue(key));
+case 2:
+return hashpointer(t,pvalue(key));
+default:
+return hashpointer(t,gcvalue(key));
+}
+}
+static int arrayindex(const TValue*key){
+if(ttisnumber(key)){
+lua_Number n=nvalue(key);
+int k;
+lua_number2int(k,n);
+if(luai_numeq(cast_num(k),n))
+return k;
+}
+return-1;
+}
+static int findindex(lua_State*L,Table*t,StkId key){
+int i;
+if(ttisnil(key))return-1;
+i=arrayindex(key);
+if(0<i&&i<=t->sizearray)
+return i-1;
+else{
+Node*n=mainposition(t,key);
+do{
+if(luaO_rawequalObj(key2tval(n),key)||
+(ttype(gkey(n))==(8+3)&&iscollectable(key)&&
+gcvalue(gkey(n))==gcvalue(key))){
+i=cast_int(n-gnode(t,0));
+return i+t->sizearray;
+}
+else n=gnext(n);
+}while(n);
+luaG_runerror(L,"invalid key to "LUA_QL("next"));
+return 0;
+}
+}
+static int luaH_next(lua_State*L,Table*t,StkId key){
+int i=findindex(L,t,key);
+for(i++;i<t->sizearray;i++){
+if(!ttisnil(&t->array[i])){
+setnvalue(key,cast_num(i+1));
+setobj(L,key+1,&t->array[i]);
+return 1;
+}
+}
+for(i-=t->sizearray;i<(int)sizenode(t);i++){
+if(!ttisnil(gval(gnode(t,i)))){
+setobj(L,key,key2tval(gnode(t,i)));
+setobj(L,key+1,gval(gnode(t,i)));
+return 1;
+}
+}
+return 0;
+}
+static int computesizes(int nums[],int*narray){
+int i;
+int twotoi;
+int a=0;
+int na=0;
+int n=0;
+for(i=0,twotoi=1;twotoi/2<*narray;i++,twotoi*=2){
+if(nums[i]>0){
+a+=nums[i];
+if(a>twotoi/2){
+n=twotoi;
+na=a;
+}
+}
+if(a==*narray)break;
+}
+*narray=n;
+return na;
+}
+static int countint(const TValue*key,int*nums){
+int k=arrayindex(key);
+if(0<k&&k<=(1<<(32-2))){
+nums[ceillog2(k)]++;
+return 1;
+}
+else
+return 0;
+}
+static int numusearray(const Table*t,int*nums){
+int lg;
+int ttlg;
+int ause=0;
+int i=1;
+for(lg=0,ttlg=1;lg<=(32-2);lg++,ttlg*=2){
+int lc=0;
+int lim=ttlg;
+if(lim>t->sizearray){
+lim=t->sizearray;
+if(i>lim)
+break;
+}
+for(;i<=lim;i++){
+if(!ttisnil(&t->array[i-1]))
+lc++;
+}
+nums[lg]+=lc;
+ause+=lc;
+}
+return ause;
+}
+static int numusehash(const Table*t,int*nums,int*pnasize){
+int totaluse=0;
+int ause=0;
+int i=sizenode(t);
+while(i--){
+Node*n=&t->node[i];
+if(!ttisnil(gval(n))){
+ause+=countint(key2tval(n),nums);
+totaluse++;
+}
+}
+*pnasize+=ause;
+return totaluse;
+}
+static void setarrayvector(lua_State*L,Table*t,int size){
+int i;
+luaM_reallocvector(L,t->array,t->sizearray,size,TValue);
+for(i=t->sizearray;i<size;i++)
+setnilvalue(&t->array[i]);
+t->sizearray=size;
+}
+static void setnodevector(lua_State*L,Table*t,int size){
+int lsize;
+if(size==0){
+t->node=cast(Node*,(&dummynode_));
+lsize=0;
+}
+else{
+int i;
+lsize=ceillog2(size);
+if(lsize>(32-2))
+luaG_runerror(L,"table overflow");
+size=twoto(lsize);
+t->node=luaM_newvector(L,size,Node);
+for(i=0;i<size;i++){
+Node*n=gnode(t,i);
+gnext(n)=NULL;
+setnilvalue(gkey(n));
+setnilvalue(gval(n));
+}
+}
+t->lsizenode=cast_byte(lsize);
+t->lastfree=gnode(t,size);
+}
+static void resize(lua_State*L,Table*t,int nasize,int nhsize){
+int i;
+int oldasize=t->sizearray;
+int oldhsize=t->lsizenode;
+Node*nold=t->node;
+if(nasize>oldasize)
+setarrayvector(L,t,nasize);
+setnodevector(L,t,nhsize);
+if(nasize<oldasize){
+t->sizearray=nasize;
+for(i=nasize;i<oldasize;i++){
+if(!ttisnil(&t->array[i]))
+setobj(L,luaH_setnum(L,t,i+1),&t->array[i]);
+}
+luaM_reallocvector(L,t->array,oldasize,nasize,TValue);
+}
+for(i=twoto(oldhsize)-1;i>=0;i--){
+Node*old=nold+i;
+if(!ttisnil(gval(old)))
+setobj(L,luaH_set(L,t,key2tval(old)),gval(old));
+}
+if(nold!=(&dummynode_))
+luaM_freearray(L,nold,twoto(oldhsize),Node);
+}
+static void luaH_resizearray(lua_State*L,Table*t,int nasize){
+int nsize=(t->node==(&dummynode_))?0:sizenode(t);
+resize(L,t,nasize,nsize);
+}
+static void rehash(lua_State*L,Table*t,const TValue*ek){
+int nasize,na;
+int nums[(32-2)+1];
+int i;
+int totaluse;
+for(i=0;i<=(32-2);i++)nums[i]=0;
+nasize=numusearray(t,nums);
+totaluse=nasize;
+totaluse+=numusehash(t,nums,&nasize);
+nasize+=countint(ek,nums);
+totaluse++;
+na=computesizes(nums,&nasize);
+resize(L,t,nasize,totaluse-na);
+}
+static Table*luaH_new(lua_State*L,int narray,int nhash){
+Table*t=luaM_new(L,Table);
+luaC_link(L,obj2gco(t),5);
+t->metatable=NULL;
+t->flags=cast_byte(~0);
+t->array=NULL;
+t->sizearray=0;
+t->lsizenode=0;
+t->node=cast(Node*,(&dummynode_));
+setarrayvector(L,t,narray);
+setnodevector(L,t,nhash);
+return t;
+}
+static void luaH_free(lua_State*L,Table*t){
+if(t->node!=(&dummynode_))
+luaM_freearray(L,t->node,sizenode(t),Node);
+luaM_freearray(L,t->array,t->sizearray,TValue);
+luaM_free(L,t);
+}
+static Node*getfreepos(Table*t){
+while(t->lastfree-->t->node){
+if(ttisnil(gkey(t->lastfree)))
+return t->lastfree;
+}
+return NULL;
+}
+static TValue*newkey(lua_State*L,Table*t,const TValue*key){
+Node*mp=mainposition(t,key);
+if(!ttisnil(gval(mp))||mp==(&dummynode_)){
+Node*othern;
+Node*n=getfreepos(t);
+if(n==NULL){
+rehash(L,t,key);
+return luaH_set(L,t,key);
+}
+othern=mainposition(t,key2tval(mp));
+if(othern!=mp){
+while(gnext(othern)!=mp)othern=gnext(othern);
+gnext(othern)=n;
+*n=*mp;
+gnext(mp)=NULL;
+setnilvalue(gval(mp));
+}
+else{
+gnext(n)=gnext(mp);
+gnext(mp)=n;
+mp=n;
+}
+}
+gkey(mp)->value=key->value;gkey(mp)->tt=key->tt;
+luaC_barriert(L,t,key);
+return gval(mp);
+}
+static const TValue*luaH_getnum(Table*t,int key){
+if(cast(unsigned int,key-1)<cast(unsigned int,t->sizearray))
+return&t->array[key-1];
+else{
+lua_Number nk=cast_num(key);
+Node*n=hashnum(t,nk);
+do{
+if(ttisnumber(gkey(n))&&luai_numeq(nvalue(gkey(n)),nk))
+return gval(n);
+else n=gnext(n);
+}while(n);
+return(&luaO_nilobject_);
+}
+}
+static const TValue*luaH_getstr(Table*t,TString*key){
+Node*n=hashstr(t,key);
+do{
+if(ttisstring(gkey(n))&&rawtsvalue(gkey(n))==key)
+return gval(n);
+else n=gnext(n);
+}while(n);
+return(&luaO_nilobject_);
+}
+static const TValue*luaH_get(Table*t,const TValue*key){
+switch(ttype(key)){
+case 0:return(&luaO_nilobject_);
+case 4:return luaH_getstr(t,rawtsvalue(key));
+case 3:{
+int k;
+lua_Number n=nvalue(key);
+lua_number2int(k,n);
+if(luai_numeq(cast_num(k),nvalue(key)))
+return luaH_getnum(t,k);
+}
+default:{
+Node*n=mainposition(t,key);
+do{
+if(luaO_rawequalObj(key2tval(n),key))
+return gval(n);
+else n=gnext(n);
+}while(n);
+return(&luaO_nilobject_);
+}
+}
+}
+static TValue*luaH_set(lua_State*L,Table*t,const TValue*key){
+const TValue*p=luaH_get(t,key);
+t->flags=0;
+if(p!=(&luaO_nilobject_))
+return cast(TValue*,p);
+else{
+if(ttisnil(key))luaG_runerror(L,"table index is nil");
+else if(ttisnumber(key)&&luai_numisnan(nvalue(key)))
+luaG_runerror(L,"table index is NaN");
+return newkey(L,t,key);
+}
+}
+static TValue*luaH_setnum(lua_State*L,Table*t,int key){
+const TValue*p=luaH_getnum(t,key);
+if(p!=(&luaO_nilobject_))
+return cast(TValue*,p);
+else{
+TValue k;
+setnvalue(&k,cast_num(key));
+return newkey(L,t,&k);
+}
+}
+static TValue*luaH_setstr(lua_State*L,Table*t,TString*key){
+const TValue*p=luaH_getstr(t,key);
+if(p!=(&luaO_nilobject_))
+return cast(TValue*,p);
+else{
+TValue k;
+setsvalue(L,&k,key);
+return newkey(L,t,&k);
+}
+}
+static int unbound_search(Table*t,unsigned int j){
+unsigned int i=j;
+j++;
+while(!ttisnil(luaH_getnum(t,j))){
+i=j;
+j*=2;
+if(j>cast(unsigned int,(INT_MAX-2))){
+i=1;
+while(!ttisnil(luaH_getnum(t,i)))i++;
+return i-1;
+}
+}
+while(j-i>1){
+unsigned int m=(i+j)/2;
+if(ttisnil(luaH_getnum(t,m)))j=m;
+else i=m;
+}
+return i;
+}
+static int luaH_getn(Table*t){
+unsigned int j=t->sizearray;
+if(j>0&&ttisnil(&t->array[j-1])){
+unsigned int i=0;
+while(j-i>1){
+unsigned int m=(i+j)/2;
+if(ttisnil(&t->array[m-1]))j=m;
+else i=m;
+}
+return i;
+}
+else if(t->node==(&dummynode_))
+return j;
+else return unbound_search(t,j);
+}
+#define makewhite(g,x)((x)->gch.marked=cast_byte(((x)->gch.marked&cast_byte(~(bitmask(2)|bit2mask(0,1))))|luaC_white(g)))
+#define white2gray(x)reset2bits((x)->gch.marked,0,1)
+#define black2gray(x)resetbit((x)->gch.marked,2)
+#define stringmark(s)reset2bits((s)->tsv.marked,0,1)
+#define isfinalized(u)testbit((u)->marked,3)
+#define markfinalized(u)l_setbit((u)->marked,3)
+#define markvalue(g,o){checkconsistency(o);if(iscollectable(o)&&iswhite(gcvalue(o)))reallymarkobject(g,gcvalue(o));}
+#define markobject(g,t){if(iswhite(obj2gco(t)))reallymarkobject(g,obj2gco(t));}
+#define setthreshold(g)(g->GCthreshold=(g->estimate/100)*g->gcpause)
+static void removeentry(Node*n){
+if(iscollectable(gkey(n)))
+setttype(gkey(n),(8+3));
+}
+static void reallymarkobject(global_State*g,GCObject*o){
+white2gray(o);
+switch(o->gch.tt){
+case 4:{
+return;
+}
+case 7:{
+Table*mt=gco2u(o)->metatable;
+gray2black(o);
+if(mt)markobject(g,mt);
+markobject(g,gco2u(o)->env);
+return;
+}
+case(8+2):{
+UpVal*uv=gco2uv(o);
+markvalue(g,uv->v);
+if(uv->v==&uv->u.value)
+gray2black(o);
+return;
+}
+case 6:{
+gco2cl(o)->c.gclist=g->gray;
+g->gray=o;
+break;
+}
+case 5:{
+gco2h(o)->gclist=g->gray;
+g->gray=o;
+break;
+}
+case 8:{
+gco2th(o)->gclist=g->gray;
+g->gray=o;
+break;
+}
+case(8+1):{
+gco2p(o)->gclist=g->gray;
+g->gray=o;
+break;
+}
+default:;
+}
+}
+static void marktmu(global_State*g){
+GCObject*u=g->tmudata;
+if(u){
+do{
+u=u->gch.next;
+makewhite(g,u);
+reallymarkobject(g,u);
+}while(u!=g->tmudata);
+}
+}
+static size_t luaC_separateudata(lua_State*L,int all){
+global_State*g=G(L);
+size_t deadmem=0;
+GCObject**p=&g->mainthread->next;
+GCObject*curr;
+while((curr=*p)!=NULL){
+if(!(iswhite(curr)||all)||isfinalized(gco2u(curr)))
+p=&curr->gch.next;
+else if(fasttm(L,gco2u(curr)->metatable,TM_GC)==NULL){
+markfinalized(gco2u(curr));
+p=&curr->gch.next;
+}
+else{
+deadmem+=sizeudata(gco2u(curr));
+markfinalized(gco2u(curr));
+*p=curr->gch.next;
+if(g->tmudata==NULL)
+g->tmudata=curr->gch.next=curr;
+else{
+curr->gch.next=g->tmudata->gch.next;
+g->tmudata->gch.next=curr;
+g->tmudata=curr;
+}
+}
+}
+return deadmem;
+}
+static int traversetable(global_State*g,Table*h){
+int i;
+int weakkey=0;
+int weakvalue=0;
+const TValue*mode;
+if(h->metatable)
+markobject(g,h->metatable);
+mode=gfasttm(g,h->metatable,TM_MODE);
+if(mode&&ttisstring(mode)){
+weakkey=(strchr(svalue(mode),'k')!=NULL);
+weakvalue=(strchr(svalue(mode),'v')!=NULL);
+if(weakkey||weakvalue){
+h->marked&=~(bitmask(3)|bitmask(4));
+h->marked|=cast_byte((weakkey<<3)|
+(weakvalue<<4));
+h->gclist=g->weak;
+g->weak=obj2gco(h);
+}
+}
+if(weakkey&&weakvalue)return 1;
+if(!weakvalue){
+i=h->sizearray;
+while(i--)
+markvalue(g,&h->array[i]);
+}
+i=sizenode(h);
+while(i--){
+Node*n=gnode(h,i);
+if(ttisnil(gval(n)))
+removeentry(n);
+else{
+if(!weakkey)markvalue(g,gkey(n));
+if(!weakvalue)markvalue(g,gval(n));
+}
+}
+return weakkey||weakvalue;
+}
+static void traverseproto(global_State*g,Proto*f){
+int i;
+if(f->source)stringmark(f->source);
+for(i=0;i<f->sizek;i++)
+markvalue(g,&f->k[i]);
+for(i=0;i<f->sizeupvalues;i++){
+if(f->upvalues[i])
+stringmark(f->upvalues[i]);
+}
+for(i=0;i<f->sizep;i++){
+if(f->p[i])
+markobject(g,f->p[i]);
+}
+for(i=0;i<f->sizelocvars;i++){
+if(f->locvars[i].varname)
+stringmark(f->locvars[i].varname);
+}
+}
+static void traverseclosure(global_State*g,Closure*cl){
+markobject(g,cl->c.env);
+if(cl->c.isC){
+int i;
+for(i=0;i<cl->c.nupvalues;i++)
+markvalue(g,&cl->c.upvalue[i]);
+}
+else{
+int i;
+markobject(g,cl->l.p);
+for(i=0;i<cl->l.nupvalues;i++)
+markobject(g,cl->l.upvals[i]);
+}
+}
+static void checkstacksizes(lua_State*L,StkId max){
+int ci_used=cast_int(L->ci-L->base_ci);
+int s_used=cast_int(max-L->stack);
+if(L->size_ci>20000)
+return;
+if(4*ci_used<L->size_ci&&2*8<L->size_ci)
+luaD_reallocCI(L,L->size_ci/2);
+condhardstacktests(luaD_reallocCI(L,ci_used+1));
+if(4*s_used<L->stacksize&&
+2*((2*20)+5)<L->stacksize)
+luaD_reallocstack(L,L->stacksize/2);
+condhardstacktests(luaD_reallocstack(L,s_used));
+}
+static void traversestack(global_State*g,lua_State*l){
+StkId o,lim;
+CallInfo*ci;
+markvalue(g,gt(l));
+lim=l->top;
+for(ci=l->base_ci;ci<=l->ci;ci++){
+if(lim<ci->top)lim=ci->top;
+}
+for(o=l->stack;o<l->top;o++)
+markvalue(g,o);
+for(;o<=lim;o++)
+setnilvalue(o);
+checkstacksizes(l,lim);
+}
+static l_mem propagatemark(global_State*g){
+GCObject*o=g->gray;
+gray2black(o);
+switch(o->gch.tt){
+case 5:{
+Table*h=gco2h(o);
+g->gray=h->gclist;
+if(traversetable(g,h))
+black2gray(o);
+return sizeof(Table)+sizeof(TValue)*h->sizearray+
+sizeof(Node)*sizenode(h);
+}
+case 6:{
+Closure*cl=gco2cl(o);
+g->gray=cl->c.gclist;
+traverseclosure(g,cl);
+return(cl->c.isC)?sizeCclosure(cl->c.nupvalues):
+sizeLclosure(cl->l.nupvalues);
+}
+case 8:{
+lua_State*th=gco2th(o);
+g->gray=th->gclist;
+th->gclist=g->grayagain;
+g->grayagain=o;
+black2gray(o);
+traversestack(g,th);
+return sizeof(lua_State)+sizeof(TValue)*th->stacksize+
+sizeof(CallInfo)*th->size_ci;
+}
+case(8+1):{
+Proto*p=gco2p(o);
+g->gray=p->gclist;
+traverseproto(g,p);
+return sizeof(Proto)+sizeof(Instruction)*p->sizecode+
+sizeof(Proto*)*p->sizep+
+sizeof(TValue)*p->sizek+
+sizeof(int)*p->sizelineinfo+
+sizeof(LocVar)*p->sizelocvars+
+sizeof(TString*)*p->sizeupvalues;
+}
+default:return 0;
+}
+}
+static size_t propagateall(global_State*g){
+size_t m=0;
+while(g->gray)m+=propagatemark(g);
+return m;
+}
+static int iscleared(const TValue*o,int iskey){
+if(!iscollectable(o))return 0;
+if(ttisstring(o)){
+stringmark(rawtsvalue(o));
+return 0;
+}
+return iswhite(gcvalue(o))||
+(ttisuserdata(o)&&(!iskey&&isfinalized(uvalue(o))));
+}
+static void cleartable(GCObject*l){
+while(l){
+Table*h=gco2h(l);
+int i=h->sizearray;
+if(testbit(h->marked,4)){
+while(i--){
+TValue*o=&h->array[i];
+if(iscleared(o,0))
+setnilvalue(o);
+}
+}
+i=sizenode(h);
+while(i--){
+Node*n=gnode(h,i);
+if(!ttisnil(gval(n))&&
+(iscleared(key2tval(n),1)||iscleared(gval(n),0))){
+setnilvalue(gval(n));
+removeentry(n);
+}
+}
+l=h->gclist;
+}
+}
+static void freeobj(lua_State*L,GCObject*o){
+switch(o->gch.tt){
+case(8+1):luaF_freeproto(L,gco2p(o));break;
+case 6:luaF_freeclosure(L,gco2cl(o));break;
+case(8+2):luaF_freeupval(L,gco2uv(o));break;
+case 5:luaH_free(L,gco2h(o));break;
+case 8:{
+luaE_freethread(L,gco2th(o));
+break;
+}
+case 4:{
+G(L)->strt.nuse--;
+luaM_freemem(L,o,sizestring(gco2ts(o)));
+break;
+}
+case 7:{
+luaM_freemem(L,o,sizeudata(gco2u(o)));
+break;
+}
+default:;
+}
+}
+#define sweepwholelist(L,p)sweeplist(L,p,((lu_mem)(~(lu_mem)0)-2))
+static GCObject**sweeplist(lua_State*L,GCObject**p,lu_mem count){
+GCObject*curr;
+global_State*g=G(L);
+int deadmask=otherwhite(g);
+while((curr=*p)!=NULL&&count-->0){
+if(curr->gch.tt==8)
+sweepwholelist(L,&gco2th(curr)->openupval);
+if((curr->gch.marked^bit2mask(0,1))&deadmask){
+makewhite(g,curr);
+p=&curr->gch.next;
+}
+else{
+*p=curr->gch.next;
+if(curr==g->rootgc)
+g->rootgc=curr->gch.next;
+freeobj(L,curr);
+}
+}
+return p;
+}
+static void checkSizes(lua_State*L){
+global_State*g=G(L);
+if(g->strt.nuse<cast(lu_int32,g->strt.size/4)&&
+g->strt.size>32*2)
+luaS_resize(L,g->strt.size/2);
+if(luaZ_sizebuffer(&g->buff)>32*2){
+size_t newsize=luaZ_sizebuffer(&g->buff)/2;
+luaZ_resizebuffer(L,&g->buff,newsize);
+}
+}
+static void GCTM(lua_State*L){
+global_State*g=G(L);
+GCObject*o=g->tmudata->gch.next;
+Udata*udata=rawgco2u(o);
+const TValue*tm;
+if(o==g->tmudata)
+g->tmudata=NULL;
+else
+g->tmudata->gch.next=udata->uv.next;
+udata->uv.next=g->mainthread->next;
+g->mainthread->next=o;
+makewhite(g,o);
+tm=fasttm(L,udata->uv.metatable,TM_GC);
+if(tm!=NULL){
+lu_byte oldah=L->allowhook;
+lu_mem oldt=g->GCthreshold;
+L->allowhook=0;
+g->GCthreshold=2*g->totalbytes;
+setobj(L,L->top,tm);
+setuvalue(L,L->top+1,udata);
+L->top+=2;
+luaD_call(L,L->top-2,0);
+L->allowhook=oldah;
+g->GCthreshold=oldt;
+}
+}
+static void luaC_callGCTM(lua_State*L){
+while(G(L)->tmudata)
+GCTM(L);
+}
+static void luaC_freeall(lua_State*L){
+global_State*g=G(L);
+int i;
+g->currentwhite=bit2mask(0,1)|bitmask(6);
+sweepwholelist(L,&g->rootgc);
+for(i=0;i<g->strt.size;i++)
+sweepwholelist(L,&g->strt.hash[i]);
+}
+static void markmt(global_State*g){
+int i;
+for(i=0;i<(8+1);i++)
+if(g->mt[i])markobject(g,g->mt[i]);
+}
+static void markroot(lua_State*L){
+global_State*g=G(L);
+g->gray=NULL;
+g->grayagain=NULL;
+g->weak=NULL;
+markobject(g,g->mainthread);
+markvalue(g,gt(g->mainthread));
+markvalue(g,registry(L));
+markmt(g);
+g->gcstate=1;
+}
+static void remarkupvals(global_State*g){
+UpVal*uv;
+for(uv=g->uvhead.u.l.next;uv!=&g->uvhead;uv=uv->u.l.next){
+if(isgray(obj2gco(uv)))
+markvalue(g,uv->v);
+}
+}
+static void atomic(lua_State*L){
+global_State*g=G(L);
+size_t udsize;
+remarkupvals(g);
+propagateall(g);
+g->gray=g->weak;
+g->weak=NULL;
+markobject(g,L);
+markmt(g);
+propagateall(g);
+g->gray=g->grayagain;
+g->grayagain=NULL;
+propagateall(g);
+udsize=luaC_separateudata(L,0);
+marktmu(g);
+udsize+=propagateall(g);
+cleartable(g->weak);
+g->currentwhite=cast_byte(otherwhite(g));
+g->sweepstrgc=0;
+g->sweepgc=&g->rootgc;
+g->gcstate=2;
+g->estimate=g->totalbytes-udsize;
+}
+static l_mem singlestep(lua_State*L){
+global_State*g=G(L);
+switch(g->gcstate){
+case 0:{
+markroot(L);
+return 0;
+}
+case 1:{
+if(g->gray)
+return propagatemark(g);
+else{
+atomic(L);
+return 0;
+}
+}
+case 2:{
+lu_mem old=g->totalbytes;
+sweepwholelist(L,&g->strt.hash[g->sweepstrgc++]);
+if(g->sweepstrgc>=g->strt.size)
+g->gcstate=3;
+g->estimate-=old-g->totalbytes;
+return 10;
+}
+case 3:{
+lu_mem old=g->totalbytes;
+g->sweepgc=sweeplist(L,g->sweepgc,40);
+if(*g->sweepgc==NULL){
+checkSizes(L);
+g->gcstate=4;
+}
+g->estimate-=old-g->totalbytes;
+return 40*10;
+}
+case 4:{
+if(g->tmudata){
+GCTM(L);
+if(g->estimate>100)
+g->estimate-=100;
+return 100;
+}
+else{
+g->gcstate=0;
+g->gcdept=0;
+return 0;
+}
+}
+default:return 0;
+}
+}
+static void luaC_step(lua_State*L){
+global_State*g=G(L);
+l_mem lim=(1024u/100)*g->gcstepmul;
+if(lim==0)
+lim=(((lu_mem)(~(lu_mem)0)-2)-1)/2;
+g->gcdept+=g->totalbytes-g->GCthreshold;
+do{
+lim-=singlestep(L);
+if(g->gcstate==0)
+break;
+}while(lim>0);
+if(g->gcstate!=0){
+if(g->gcdept<1024u)
+g->GCthreshold=g->totalbytes+1024u;
+else{
+g->gcdept-=1024u;
+g->GCthreshold=g->totalbytes;
+}
+}
+else{
+setthreshold(g);
+}
+}
+static void luaC_barrierf(lua_State*L,GCObject*o,GCObject*v){
+global_State*g=G(L);
+if(g->gcstate==1)
+reallymarkobject(g,v);
+else
+makewhite(g,o);
+}
+static void luaC_barrierback(lua_State*L,Table*t){
+global_State*g=G(L);
+GCObject*o=obj2gco(t);
+black2gray(o);
+t->gclist=g->grayagain;
+g->grayagain=o;
+}
+static void luaC_link(lua_State*L,GCObject*o,lu_byte tt){
+global_State*g=G(L);
+o->gch.next=g->rootgc;
+g->rootgc=o;
+o->gch.marked=luaC_white(g);
+o->gch.tt=tt;
+}
+static void luaC_linkupval(lua_State*L,UpVal*uv){
+global_State*g=G(L);
+GCObject*o=obj2gco(uv);
+o->gch.next=g->rootgc;
+g->rootgc=o;
+if(isgray(o)){
+if(g->gcstate==1){
+gray2black(o);
+luaC_barrier(L,uv,uv->v);
+}
+else{
+makewhite(g,o);
+}
+}
+}
+typedef union{
+lua_Number r;
+TString*ts;
+}SemInfo;
+typedef struct Token{
+int token;
+SemInfo seminfo;
+}Token;
+typedef struct LexState{
+int current;
+int linenumber;
+int lastline;
+Token t;
+Token lookahead;
+struct FuncState*fs;
+struct lua_State*L;
+ZIO*z;
+Mbuffer*buff;
+TString*source;
+char decpoint;
+}LexState;
+static void luaX_init(lua_State*L);
+static void luaX_lexerror(LexState*ls,const char*msg,int token);
+#define state_size(x)(sizeof(x)+0)
+#define fromstate(l)(cast(lu_byte*,(l))-0)
+#define tostate(l)(cast(lua_State*,cast(lu_byte*,l)+0))
+typedef struct LG{
+lua_State l;
+global_State g;
+}LG;
+static void stack_init(lua_State*L1,lua_State*L){
+L1->base_ci=luaM_newvector(L,8,CallInfo);
+L1->ci=L1->base_ci;
+L1->size_ci=8;
+L1->end_ci=L1->base_ci+L1->size_ci-1;
+L1->stack=luaM_newvector(L,(2*20)+5,TValue);
+L1->stacksize=(2*20)+5;
+L1->top=L1->stack;
+L1->stack_last=L1->stack+(L1->stacksize-5)-1;
+L1->ci->func=L1->top;
+setnilvalue(L1->top++);
+L1->base=L1->ci->base=L1->top;
+L1->ci->top=L1->top+20;
+}
+static void freestack(lua_State*L,lua_State*L1){
+luaM_freearray(L,L1->base_ci,L1->size_ci,CallInfo);
+luaM_freearray(L,L1->stack,L1->stacksize,TValue);
+}
+static void f_luaopen(lua_State*L,void*ud){
+global_State*g=G(L);
+UNUSED(ud);
+stack_init(L,L);
+sethvalue(L,gt(L),luaH_new(L,0,2));
+sethvalue(L,registry(L),luaH_new(L,0,2));
+luaS_resize(L,32);
+luaT_init(L);
+luaX_init(L);
+luaS_fix(luaS_newliteral(L,"not enough memory"));
+g->GCthreshold=4*g->totalbytes;
+}
+static void preinit_state(lua_State*L,global_State*g){
+G(L)=g;
+L->stack=NULL;
+L->stacksize=0;
+L->errorJmp=NULL;
+L->hook=NULL;
+L->hookmask=0;
+L->basehookcount=0;
+L->allowhook=1;
+resethookcount(L);
+L->openupval=NULL;
+L->size_ci=0;
+L->nCcalls=L->baseCcalls=0;
+L->status=0;
+L->base_ci=L->ci=NULL;
+L->savedpc=NULL;
+L->errfunc=0;
+setnilvalue(gt(L));
+}
+static void close_state(lua_State*L){
+global_State*g=G(L);
+luaF_close(L,L->stack);
+luaC_freeall(L);
+luaM_freearray(L,G(L)->strt.hash,G(L)->strt.size,TString*);
+luaZ_freebuffer(L,&g->buff);
+freestack(L,L);
+(*g->frealloc)(g->ud,fromstate(L),state_size(LG),0);
+}
+static void luaE_freethread(lua_State*L,lua_State*L1){
+luaF_close(L1,L1->stack);
+freestack(L,L1);
+luaM_freemem(L,fromstate(L1),state_size(lua_State));
+}
+static lua_State*lua_newstate(lua_Alloc f,void*ud){
+int i;
+lua_State*L;
+global_State*g;
+void*l=(*f)(ud,NULL,0,state_size(LG));
+if(l==NULL)return NULL;
+L=tostate(l);
+g=&((LG*)L)->g;
+L->next=NULL;
+L->tt=8;
+g->currentwhite=bit2mask(0,5);
+L->marked=luaC_white(g);
+set2bits(L->marked,5,6);
+preinit_state(L,g);
+g->frealloc=f;
+g->ud=ud;
+g->mainthread=L;
+g->uvhead.u.l.prev=&g->uvhead;
+g->uvhead.u.l.next=&g->uvhead;
+g->GCthreshold=0;
+g->strt.size=0;
+g->strt.nuse=0;
+g->strt.hash=NULL;
+setnilvalue(registry(L));
+luaZ_initbuffer(L,&g->buff);
+g->panic=NULL;
+g->gcstate=0;
+g->rootgc=obj2gco(L);
+g->sweepstrgc=0;
+g->sweepgc=&g->rootgc;
+g->gray=NULL;
+g->grayagain=NULL;
+g->weak=NULL;
+g->tmudata=NULL;
+g->totalbytes=sizeof(LG);
+g->gcpause=200;
+g->gcstepmul=200;
+g->gcdept=0;
+for(i=0;i<(8+1);i++)g->mt[i]=NULL;
+if(luaD_rawrunprotected(L,f_luaopen,NULL)!=0){
+close_state(L);
+L=NULL;
+}
+else
+{}
+return L;
+}
+static void callallgcTM(lua_State*L,void*ud){
+UNUSED(ud);
+luaC_callGCTM(L);
+}
+static void lua_close(lua_State*L){
+L=G(L)->mainthread;
+luaF_close(L,L->stack);
+luaC_separateudata(L,1);
+L->errfunc=0;
+do{
+L->ci=L->base_ci;
+L->base=L->top=L->ci->base;
+L->nCcalls=L->baseCcalls=0;
+}while(luaD_rawrunprotected(L,callallgcTM,NULL)!=0);
+close_state(L);
+}
+#define getcode(fs,e)((fs)->f->code[(e)->u.s.info])
+#define luaK_codeAsBx(fs,o,A,sBx)luaK_codeABx(fs,o,A,(sBx)+(((1<<(9+9))-1)>>1))
+#define luaK_setmultret(fs,e)luaK_setreturns(fs,e,(-1))
+static int luaK_codeABx(FuncState*fs,OpCode o,int A,unsigned int Bx);
+static int luaK_codeABC(FuncState*fs,OpCode o,int A,int B,int C);
+static void luaK_setreturns(FuncState*fs,expdesc*e,int nresults);
+static void luaK_patchtohere(FuncState*fs,int list);
+static void luaK_concat(FuncState*fs,int*l1,int l2);
+static int currentpc(lua_State*L,CallInfo*ci){
+if(!isLua(ci))return-1;
+if(ci==L->ci)
+ci->savedpc=L->savedpc;
+return pcRel(ci->savedpc,ci_func(ci)->l.p);
+}
+static int currentline(lua_State*L,CallInfo*ci){
+int pc=currentpc(L,ci);
+if(pc<0)
+return-1;
+else
+return getline_(ci_func(ci)->l.p,pc);
+}
+static int lua_getstack(lua_State*L,int level,lua_Debug*ar){
+int status;
+CallInfo*ci;
+for(ci=L->ci;level>0&&ci>L->base_ci;ci--){
+level--;
+if(f_isLua(ci))
+level-=ci->tailcalls;
+}
+if(level==0&&ci>L->base_ci){
+status=1;
+ar->i_ci=cast_int(ci-L->base_ci);
+}
+else if(level<0){
+status=1;
+ar->i_ci=0;
+}
+else status=0;
+return status;
+}
+static Proto*getluaproto(CallInfo*ci){
+return(isLua(ci)?ci_func(ci)->l.p:NULL);
+}
+static void funcinfo(lua_Debug*ar,Closure*cl){
+if(cl->c.isC){
+ar->source="=[C]";
+ar->linedefined=-1;
+ar->lastlinedefined=-1;
+ar->what="C";
+}
+else{
+ar->source=getstr(cl->l.p->source);
+ar->linedefined=cl->l.p->linedefined;
+ar->lastlinedefined=cl->l.p->lastlinedefined;
+ar->what=(ar->linedefined==0)?"main":"Lua";
+}
+luaO_chunkid(ar->short_src,ar->source,60);
+}
+static void info_tailcall(lua_Debug*ar){
+ar->name=ar->namewhat="";
+ar->what="tail";
+ar->lastlinedefined=ar->linedefined=ar->currentline=-1;
+ar->source="=(tail call)";
+luaO_chunkid(ar->short_src,ar->source,60);
+ar->nups=0;
+}
+static void collectvalidlines(lua_State*L,Closure*f){
+if(f==NULL||f->c.isC){
+setnilvalue(L->top);
+}
+else{
+Table*t=luaH_new(L,0,0);
+int*lineinfo=f->l.p->lineinfo;
+int i;
+for(i=0;i<f->l.p->sizelineinfo;i++)
+setbvalue(luaH_setnum(L,t,lineinfo[i]),1);
+sethvalue(L,L->top,t);
+}
+incr_top(L);
+}
+static int auxgetinfo(lua_State*L,const char*what,lua_Debug*ar,
+Closure*f,CallInfo*ci){
+int status=1;
+if(f==NULL){
+info_tailcall(ar);
+return status;
+}
+for(;*what;what++){
+switch(*what){
+case'S':{
+funcinfo(ar,f);
+break;
+}
+case'l':{
+ar->currentline=(ci)?currentline(L,ci):-1;
+break;
+}
+case'u':{
+ar->nups=f->c.nupvalues;
+break;
+}
+case'n':{
+ar->namewhat=(ci)?NULL:NULL;
+if(ar->namewhat==NULL){
+ar->namewhat="";
+ar->name=NULL;
+}
+break;
+}
+case'L':
+case'f':
+break;
+default:status=0;
+}
+}
+return status;
+}
+static int lua_getinfo(lua_State*L,const char*what,lua_Debug*ar){
+int status;
+Closure*f=NULL;
+CallInfo*ci=NULL;
+if(*what=='>'){
+StkId func=L->top-1;
+luai_apicheck(L,ttisfunction(func));
+what++;
+f=clvalue(func);
+L->top--;
+}
+else if(ar->i_ci!=0){
+ci=L->base_ci+ar->i_ci;
+f=clvalue(ci->func);
+}
+status=auxgetinfo(L,what,ar,f,ci);
+if(strchr(what,'f')){
+if(f==NULL)setnilvalue(L->top);
+else setclvalue(L,L->top,f);
+incr_top(L);
+}
+if(strchr(what,'L'))
+collectvalidlines(L,f);
+return status;
+}
+static int isinstack(CallInfo*ci,const TValue*o){
+StkId p;
+for(p=ci->base;p<ci->top;p++)
+if(o==p)return 1;
+return 0;
+}
+static void luaG_typeerror(lua_State*L,const TValue*o,const char*op){
+const char*name=NULL;
+const char*t=luaT_typenames[ttype(o)];
+const char*kind=(isinstack(L->ci,o))?
+NULL:
+NULL;
+if(kind)
+luaG_runerror(L,"attempt to %s %s "LUA_QL("%s")" (a %s value)",
+op,kind,name,t);
+else
+luaG_runerror(L,"attempt to %s a %s value",op,t);
+}
+static void luaG_concaterror(lua_State*L,StkId p1,StkId p2){
+if(ttisstring(p1)||ttisnumber(p1))p1=p2;
+luaG_typeerror(L,p1,"concatenate");
+}
+static void luaG_aritherror(lua_State*L,const TValue*p1,const TValue*p2){
+TValue temp;
+if(luaV_tonumber(p1,&temp)==NULL)
+p2=p1;
+luaG_typeerror(L,p2,"perform arithmetic on");
+}
+static int luaG_ordererror(lua_State*L,const TValue*p1,const TValue*p2){
+const char*t1=luaT_typenames[ttype(p1)];
+const char*t2=luaT_typenames[ttype(p2)];
+if(t1[2]==t2[2])
+luaG_runerror(L,"attempt to compare two %s values",t1);
+else
+luaG_runerror(L,"attempt to compare %s with %s",t1,t2);
+return 0;
+}
+static void addinfo(lua_State*L,const char*msg){
+CallInfo*ci=L->ci;
+if(isLua(ci)){
+char buff[60];
+int line=currentline(L,ci);
+luaO_chunkid(buff,getstr(getluaproto(ci)->source),60);
+luaO_pushfstring(L,"%s:%d: %s",buff,line,msg);
+}
+}
+static void luaG_errormsg(lua_State*L){
+if(L->errfunc!=0){
+StkId errfunc=restorestack(L,L->errfunc);
+if(!ttisfunction(errfunc))luaD_throw(L,5);
+setobj(L,L->top,L->top-1);
+setobj(L,L->top-1,errfunc);
+incr_top(L);
+luaD_call(L,L->top-2,1);
+}
+luaD_throw(L,2);
+}
+static void luaG_runerror(lua_State*L,const char*fmt,...){
+va_list argp;
+va_start(argp,fmt);
+addinfo(L,luaO_pushvfstring(L,fmt,argp));
+va_end(argp);
+luaG_errormsg(L);
+}
+static int luaZ_fill(ZIO*z){
+size_t size;
+lua_State*L=z->L;
+const char*buff;
+buff=z->reader(L,z->data,&size);
+if(buff==NULL||size==0)return(-1);
+z->n=size-1;
+z->p=buff;
+return char2int(*(z->p++));
+}
+static void luaZ_init(lua_State*L,ZIO*z,lua_Reader reader,void*data){
+z->L=L;
+z->reader=reader;
+z->data=data;
+z->n=0;
+z->p=NULL;
+}
+static char*luaZ_openspace(lua_State*L,Mbuffer*buff,size_t n){
+if(n>buff->buffsize){
+if(n<32)n=32;
+luaZ_resizebuffer(L,buff,n);
+}
+return buff->buffer;
+}
+#define opmode(t,a,b,c,m)(((t)<<7)|((a)<<6)|((b)<<4)|((c)<<2)|(m))
+static const lu_byte luaP_opmodes[(cast(int,OP_VARARG)+1)]={
+opmode(0,1,OpArgR,OpArgN,iABC)
+,opmode(0,1,OpArgK,OpArgN,iABx)
+,opmode(0,1,OpArgU,OpArgU,iABC)
+,opmode(0,1,OpArgR,OpArgN,iABC)
+,opmode(0,1,OpArgU,OpArgN,iABC)
+,opmode(0,1,OpArgK,OpArgN,iABx)
+,opmode(0,1,OpArgR,OpArgK,iABC)
+,opmode(0,0,OpArgK,OpArgN,iABx)
+,opmode(0,0,OpArgU,OpArgN,iABC)
+,opmode(0,0,OpArgK,OpArgK,iABC)
+,opmode(0,1,OpArgU,OpArgU,iABC)
+,opmode(0,1,OpArgR,OpArgK,iABC)
+,opmode(0,1,OpArgK,OpArgK,iABC)
+,opmode(0,1,OpArgK,OpArgK,iABC)
+,opmode(0,1,OpArgK,OpArgK,iABC)
+,opmode(0,1,OpArgK,OpArgK,iABC)
+,opmode(0,1,OpArgK,OpArgK,iABC)
+,opmode(0,1,OpArgK,OpArgK,iABC)
+,opmode(0,1,OpArgR,OpArgN,iABC)
+,opmode(0,1,OpArgR,OpArgN,iABC)
+,opmode(0,1,OpArgR,OpArgN,iABC)
+,opmode(0,1,OpArgR,OpArgR,iABC)
+,opmode(0,0,OpArgR,OpArgN,iAsBx)
+,opmode(1,0,OpArgK,OpArgK,iABC)
+,opmode(1,0,OpArgK,OpArgK,iABC)
+,opmode(1,0,OpArgK,OpArgK,iABC)
+,opmode(1,1,OpArgR,OpArgU,iABC)
+,opmode(1,1,OpArgR,OpArgU,iABC)
+,opmode(0,1,OpArgU,OpArgU,iABC)
+,opmode(0,1,OpArgU,OpArgU,iABC)
+,opmode(0,0,OpArgU,OpArgN,iABC)
+,opmode(0,1,OpArgR,OpArgN,iAsBx)
+,opmode(0,1,OpArgR,OpArgN,iAsBx)
+,opmode(1,0,OpArgN,OpArgU,iABC)
+,opmode(0,0,OpArgU,OpArgU,iABC)
+,opmode(0,0,OpArgN,OpArgN,iABC)
+,opmode(0,1,OpArgU,OpArgN,iABx)
+,opmode(0,1,OpArgU,OpArgN,iABC)
+};
+#define next(ls)(ls->current=zgetc(ls->z))
+#define currIsNewline(ls)(ls->current=='\n'||ls->current=='\r')
+static const char*const luaX_tokens[]={
+"and","break","do","else","elseif",
+"end","false","for","function","if",
+"in","local","nil","not","or","repeat",
+"return","then","true","until","while",
+"..","...","==",">=","<=","~=",
+"<number>","<name>","<string>","<eof>",
+NULL
+};
+#define save_and_next(ls)(save(ls,ls->current),next(ls))
+static void save(LexState*ls,int c){
+Mbuffer*b=ls->buff;
+if(b->n+1>b->buffsize){
+size_t newsize;
+if(b->buffsize>=((size_t)(~(size_t)0)-2)/2)
+luaX_lexerror(ls,"lexical element too long",0);
+newsize=b->buffsize*2;
+luaZ_resizebuffer(ls->L,b,newsize);
+}
+b->buffer[b->n++]=cast(char,c);
+}
+static void luaX_init(lua_State*L){
+int i;
+for(i=0;i<(cast(int,TK_WHILE-257+1));i++){
+TString*ts=luaS_new(L,luaX_tokens[i]);
+luaS_fix(ts);
+ts->tsv.reserved=cast_byte(i+1);
+}
+}
+static const char*luaX_token2str(LexState*ls,int token){
+if(token<257){
+return(iscntrl(token))?luaO_pushfstring(ls->L,"char(%d)",token):
+luaO_pushfstring(ls->L,"%c",token);
+}
+else
+return luaX_tokens[token-257];
+}
+static const char*txtToken(LexState*ls,int token){
+switch(token){
+case TK_NAME:
+case TK_STRING:
+case TK_NUMBER:
+save(ls,'\0');
+return luaZ_buffer(ls->buff);
+default:
+return luaX_token2str(ls,token);
+}
+}
+static void luaX_lexerror(LexState*ls,const char*msg,int token){
+char buff[80];
+luaO_chunkid(buff,getstr(ls->source),80);
+msg=luaO_pushfstring(ls->L,"%s:%d: %s",buff,ls->linenumber,msg);
+if(token)
+luaO_pushfstring(ls->L,"%s near "LUA_QL("%s"),msg,txtToken(ls,token));
+luaD_throw(ls->L,3);
+}
+static void luaX_syntaxerror(LexState*ls,const char*msg){
+luaX_lexerror(ls,msg,ls->t.token);
+}
+static TString*luaX_newstring(LexState*ls,const char*str,size_t l){
+lua_State*L=ls->L;
+TString*ts=luaS_newlstr(L,str,l);
+TValue*o=luaH_setstr(L,ls->fs->h,ts);
+if(ttisnil(o)){
+setbvalue(o,1);
+luaC_checkGC(L);
+}
+return ts;
+}
+static void inclinenumber(LexState*ls){
+int old=ls->current;
+next(ls);
+if(currIsNewline(ls)&&ls->current!=old)
+next(ls);
+if(++ls->linenumber>=(INT_MAX-2))
+luaX_syntaxerror(ls,"chunk has too many lines");
+}
+static void luaX_setinput(lua_State*L,LexState*ls,ZIO*z,TString*source){
+ls->decpoint='.';
+ls->L=L;
+ls->lookahead.token=TK_EOS;
+ls->z=z;
+ls->fs=NULL;
+ls->linenumber=1;
+ls->lastline=1;
+ls->source=source;
+luaZ_resizebuffer(ls->L,ls->buff,32);
+next(ls);
+}
+static int check_next(LexState*ls,const char*set){
+if(!strchr(set,ls->current))
+return 0;
+save_and_next(ls);
+return 1;
+}
+static void buffreplace(LexState*ls,char from,char to){
+size_t n=luaZ_bufflen(ls->buff);
+char*p=luaZ_buffer(ls->buff);
+while(n--)
+if(p[n]==from)p[n]=to;
+}
+static void read_numeral(LexState*ls,SemInfo*seminfo){
+do{
+save_and_next(ls);
+}while(isdigit(ls->current)||ls->current=='.');
+if(check_next(ls,"Ee"))
+check_next(ls,"+-");
+while(isalnum(ls->current)||ls->current=='_')
+save_and_next(ls);
+save(ls,'\0');
+buffreplace(ls,'.',ls->decpoint);
+if(!luaO_str2d(luaZ_buffer(ls->buff),&seminfo->r))
+luaX_lexerror(ls,"malformed number",TK_NUMBER);
+}
+static int skip_sep(LexState*ls){
+int count=0;
+int s=ls->current;
+save_and_next(ls);
+while(ls->current=='='){
+save_and_next(ls);
+count++;
+}
+return(ls->current==s)?count:(-count)-1;
+}
+static void read_long_string(LexState*ls,SemInfo*seminfo,int sep){
+int cont=0;
+(void)(cont);
+save_and_next(ls);
+if(currIsNewline(ls))
+inclinenumber(ls);
+for(;;){
+switch(ls->current){
+case(-1):
+luaX_lexerror(ls,(seminfo)?"unfinished long string":
+"unfinished long comment",TK_EOS);
+break;
+case']':{
+if(skip_sep(ls)==sep){
+save_and_next(ls);
+goto endloop;
+}
+break;
+}
+case'\n':
+case'\r':{
+save(ls,'\n');
+inclinenumber(ls);
+if(!seminfo)luaZ_resetbuffer(ls->buff);
+break;
+}
+default:{
+if(seminfo)save_and_next(ls);
+else next(ls);
+}
+}
+}endloop:
+if(seminfo)
+seminfo->ts=luaX_newstring(ls,luaZ_buffer(ls->buff)+(2+sep),
+luaZ_bufflen(ls->buff)-2*(2+sep));
+}
+static void read_string(LexState*ls,int del,SemInfo*seminfo){
+save_and_next(ls);
+while(ls->current!=del){
+switch(ls->current){
+case(-1):
+luaX_lexerror(ls,"unfinished string",TK_EOS);
+continue;
+case'\n':
+case'\r':
+luaX_lexerror(ls,"unfinished string",TK_STRING);
+continue;
+case'\\':{
+int c;
+next(ls);
+switch(ls->current){
+case'a':c='\a';break;
+case'b':c='\b';break;
+case'f':c='\f';break;
+case'n':c='\n';break;
+case'r':c='\r';break;
+case't':c='\t';break;
+case'v':c='\v';break;
+case'\n':
+case'\r':save(ls,'\n');inclinenumber(ls);continue;
+case(-1):continue;
+default:{
+if(!isdigit(ls->current))
+save_and_next(ls);
+else{
+int i=0;
+c=0;
+do{
+c=10*c+(ls->current-'0');
+next(ls);
+}while(++i<3&&isdigit(ls->current));
+if(c>UCHAR_MAX)
+luaX_lexerror(ls,"escape sequence too large",TK_STRING);
+save(ls,c);
+}
+continue;
+}
+}
+save(ls,c);
+next(ls);
+continue;
+}
+default:
+save_and_next(ls);
+}
+}
+save_and_next(ls);
+seminfo->ts=luaX_newstring(ls,luaZ_buffer(ls->buff)+1,
+luaZ_bufflen(ls->buff)-2);
+}
+static int llex(LexState*ls,SemInfo*seminfo){
+luaZ_resetbuffer(ls->buff);
+for(;;){
+switch(ls->current){
+case'\n':
+case'\r':{
+inclinenumber(ls);
+continue;
+}
+case'-':{
+next(ls);
+if(ls->current!='-')return'-';
+next(ls);
+if(ls->current=='['){
+int sep=skip_sep(ls);
+luaZ_resetbuffer(ls->buff);
+if(sep>=0){
+read_long_string(ls,NULL,sep);
+luaZ_resetbuffer(ls->buff);
+continue;
+}
+}
+while(!currIsNewline(ls)&&ls->current!=(-1))
+next(ls);
+continue;
+}
+case'[':{
+int sep=skip_sep(ls);
+if(sep>=0){
+read_long_string(ls,seminfo,sep);
+return TK_STRING;
+}
+else if(sep==-1)return'[';
+else luaX_lexerror(ls,"invalid long string delimiter",TK_STRING);
+}
+case'=':{
+next(ls);
+if(ls->current!='=')return'=';
+else{next(ls);return TK_EQ;}
+}
+case'<':{
+next(ls);
+if(ls->current!='=')return'<';
+else{next(ls);return TK_LE;}
+}
+case'>':{
+next(ls);
+if(ls->current!='=')return'>';
+else{next(ls);return TK_GE;}
+}
+case'~':{
+next(ls);
+if(ls->current!='=')return'~';
+else{next(ls);return TK_NE;}
+}
+case'"':
+case'\'':{
+read_string(ls,ls->current,seminfo);
+return TK_STRING;
+}
+case'.':{
+save_and_next(ls);
+if(check_next(ls,".")){
+if(check_next(ls,"."))
+return TK_DOTS;
+else return TK_CONCAT;
+}
+else if(!isdigit(ls->current))return'.';
+else{
+read_numeral(ls,seminfo);
+return TK_NUMBER;
+}
+}
+case(-1):{
+return TK_EOS;
+}
+default:{
+if(isspace(ls->current)){
+next(ls);
+continue;
+}
+else if(isdigit(ls->current)){
+read_numeral(ls,seminfo);
+return TK_NUMBER;
+}
+else if(isalpha(ls->current)||ls->current=='_'){
+TString*ts;
+do{
+save_and_next(ls);
+}while(isalnum(ls->current)||ls->current=='_');
+ts=luaX_newstring(ls,luaZ_buffer(ls->buff),
+luaZ_bufflen(ls->buff));
+if(ts->tsv.reserved>0)
+return ts->tsv.reserved-1+257;
+else{
+seminfo->ts=ts;
+return TK_NAME;
+}
+}
+else{
+int c=ls->current;
+next(ls);
+return c;
+}
+}
+}
+}
+}
+static void luaX_next(LexState*ls){
+ls->lastline=ls->linenumber;
+if(ls->lookahead.token!=TK_EOS){
+ls->t=ls->lookahead;
+ls->lookahead.token=TK_EOS;
+}
+else
+ls->t.token=llex(ls,&ls->t.seminfo);
+}
+static void luaX_lookahead(LexState*ls){
+ls->lookahead.token=llex(ls,&ls->lookahead.seminfo);
+}
+#define hasjumps(e)((e)->t!=(e)->f)
+static int isnumeral(expdesc*e){
+return(e->k==VKNUM&&e->t==(-1)&&e->f==(-1));
+}
+static void luaK_nil(FuncState*fs,int from,int n){
+Instruction*previous;
+if(fs->pc>fs->lasttarget){
+if(fs->pc==0){
+if(from>=fs->nactvar)
+return;
+}
+else{
+previous=&fs->f->code[fs->pc-1];
+if(GET_OPCODE(*previous)==OP_LOADNIL){
+int pfrom=GETARG_A(*previous);
+int pto=GETARG_B(*previous);
+if(pfrom<=from&&from<=pto+1){
+if(from+n-1>pto)
+SETARG_B(*previous,from+n-1);
+return;
+}
+}
+}
+}
+luaK_codeABC(fs,OP_LOADNIL,from,from+n-1,0);
+}
+static int luaK_jump(FuncState*fs){
+int jpc=fs->jpc;
+int j;
+fs->jpc=(-1);
+j=luaK_codeAsBx(fs,OP_JMP,0,(-1));
+luaK_concat(fs,&j,jpc);
+return j;
+}
+static void luaK_ret(FuncState*fs,int first,int nret){
+luaK_codeABC(fs,OP_RETURN,first,nret+1,0);
+}
+static int condjump(FuncState*fs,OpCode op,int A,int B,int C){
+luaK_codeABC(fs,op,A,B,C);
+return luaK_jump(fs);
+}
+static void fixjump(FuncState*fs,int pc,int dest){
+Instruction*jmp=&fs->f->code[pc];
+int offset=dest-(pc+1);
+if(abs(offset)>(((1<<(9+9))-1)>>1))
+luaX_syntaxerror(fs->ls,"control structure too long");
+SETARG_sBx(*jmp,offset);
+}
+static int luaK_getlabel(FuncState*fs){
+fs->lasttarget=fs->pc;
+return fs->pc;
+}
+static int getjump(FuncState*fs,int pc){
+int offset=GETARG_sBx(fs->f->code[pc]);
+if(offset==(-1))
+return(-1);
+else
+return(pc+1)+offset;
+}
+static Instruction*getjumpcontrol(FuncState*fs,int pc){
+Instruction*pi=&fs->f->code[pc];
+if(pc>=1&&testTMode(GET_OPCODE(*(pi-1))))
+return pi-1;
+else
+return pi;
+}
+static int need_value(FuncState*fs,int list){
+for(;list!=(-1);list=getjump(fs,list)){
+Instruction i=*getjumpcontrol(fs,list);
+if(GET_OPCODE(i)!=OP_TESTSET)return 1;
+}
+return 0;
+}
+static int patchtestreg(FuncState*fs,int node,int reg){
+Instruction*i=getjumpcontrol(fs,node);
+if(GET_OPCODE(*i)!=OP_TESTSET)
+return 0;
+if(reg!=((1<<8)-1)&&reg!=GETARG_B(*i))
+SETARG_A(*i,reg);
+else
+*i=CREATE_ABC(OP_TEST,GETARG_B(*i),0,GETARG_C(*i));
+return 1;
+}
+static void removevalues(FuncState*fs,int list){
+for(;list!=(-1);list=getjump(fs,list))
+patchtestreg(fs,list,((1<<8)-1));
+}
+static void patchlistaux(FuncState*fs,int list,int vtarget,int reg,
+int dtarget){
+while(list!=(-1)){
+int next=getjump(fs,list);
+if(patchtestreg(fs,list,reg))
+fixjump(fs,list,vtarget);
+else
+fixjump(fs,list,dtarget);
+list=next;
+}
+}
+static void dischargejpc(FuncState*fs){
+patchlistaux(fs,fs->jpc,fs->pc,((1<<8)-1),fs->pc);
+fs->jpc=(-1);
+}
+static void luaK_patchlist(FuncState*fs,int list,int target){
+if(target==fs->pc)
+luaK_patchtohere(fs,list);
+else{
+patchlistaux(fs,list,target,((1<<8)-1),target);
+}
+}
+static void luaK_patchtohere(FuncState*fs,int list){
+luaK_getlabel(fs);
+luaK_concat(fs,&fs->jpc,list);
+}
+static void luaK_concat(FuncState*fs,int*l1,int l2){
+if(l2==(-1))return;
+else if(*l1==(-1))
+*l1=l2;
+else{
+int list=*l1;
+int next;
+while((next=getjump(fs,list))!=(-1))
+list=next;
+fixjump(fs,list,l2);
+}
+}
+static void luaK_checkstack(FuncState*fs,int n){
+int newstack=fs->freereg+n;
+if(newstack>fs->f->maxstacksize){
+if(newstack>=250)
+luaX_syntaxerror(fs->ls,"function or expression too complex");
+fs->f->maxstacksize=cast_byte(newstack);
+}
+}
+static void luaK_reserveregs(FuncState*fs,int n){
+luaK_checkstack(fs,n);
+fs->freereg+=n;
+}
+static void freereg(FuncState*fs,int reg){
+if(!ISK(reg)&&reg>=fs->nactvar){
+fs->freereg--;
+}
+}
+static void freeexp(FuncState*fs,expdesc*e){
+if(e->k==VNONRELOC)
+freereg(fs,e->u.s.info);
+}
+static int addk(FuncState*fs,TValue*k,TValue*v){
+lua_State*L=fs->L;
+TValue*idx=luaH_set(L,fs->h,k);
+Proto*f=fs->f;
+int oldsize=f->sizek;
+if(ttisnumber(idx)){
+return cast_int(nvalue(idx));
+}
+else{
+setnvalue(idx,cast_num(fs->nk));
+luaM_growvector(L,f->k,fs->nk,f->sizek,TValue,
+((1<<(9+9))-1),"constant table overflow");
+while(oldsize<f->sizek)setnilvalue(&f->k[oldsize++]);
+setobj(L,&f->k[fs->nk],v);
+luaC_barrier(L,f,v);
+return fs->nk++;
+}
+}
+static int luaK_stringK(FuncState*fs,TString*s){
+TValue o;
+setsvalue(fs->L,&o,s);
+return addk(fs,&o,&o);
+}
+static int luaK_numberK(FuncState*fs,lua_Number r){
+TValue o;
+setnvalue(&o,r);
+return addk(fs,&o,&o);
+}
+static int boolK(FuncState*fs,int b){
+TValue o;
+setbvalue(&o,b);
+return addk(fs,&o,&o);
+}
+static int nilK(FuncState*fs){
+TValue k,v;
+setnilvalue(&v);
+sethvalue(fs->L,&k,fs->h);
+return addk(fs,&k,&v);
+}
+static void luaK_setreturns(FuncState*fs,expdesc*e,int nresults){
+if(e->k==VCALL){
+SETARG_C(getcode(fs,e),nresults+1);
+}
+else if(e->k==VVARARG){
+SETARG_B(getcode(fs,e),nresults+1);
+SETARG_A(getcode(fs,e),fs->freereg);
+luaK_reserveregs(fs,1);
+}
+}
+static void luaK_setoneret(FuncState*fs,expdesc*e){
+if(e->k==VCALL){
+e->k=VNONRELOC;
+e->u.s.info=GETARG_A(getcode(fs,e));
+}
+else if(e->k==VVARARG){
+SETARG_B(getcode(fs,e),2);
+e->k=VRELOCABLE;
+}
+}
+static void luaK_dischargevars(FuncState*fs,expdesc*e){
+switch(e->k){
+case VLOCAL:{
+e->k=VNONRELOC;
+break;
+}
+case VUPVAL:{
+e->u.s.info=luaK_codeABC(fs,OP_GETUPVAL,0,e->u.s.info,0);
+e->k=VRELOCABLE;
+break;
+}
+case VGLOBAL:{
+e->u.s.info=luaK_codeABx(fs,OP_GETGLOBAL,0,e->u.s.info);
+e->k=VRELOCABLE;
+break;
+}
+case VINDEXED:{
+freereg(fs,e->u.s.aux);
+freereg(fs,e->u.s.info);
+e->u.s.info=luaK_codeABC(fs,OP_GETTABLE,0,e->u.s.info,e->u.s.aux);
+e->k=VRELOCABLE;
+break;
+}
+case VVARARG:
+case VCALL:{
+luaK_setoneret(fs,e);
+break;
+}
+default:break;
+}
+}
+static int code_label(FuncState*fs,int A,int b,int jump){
+luaK_getlabel(fs);
+return luaK_codeABC(fs,OP_LOADBOOL,A,b,jump);
+}
+static void discharge2reg(FuncState*fs,expdesc*e,int reg){
+luaK_dischargevars(fs,e);
+switch(e->k){
+case VNIL:{
+luaK_nil(fs,reg,1);
+break;
+}
+case VFALSE:case VTRUE:{
+luaK_codeABC(fs,OP_LOADBOOL,reg,e->k==VTRUE,0);
+break;
+}
+case VK:{
+luaK_codeABx(fs,OP_LOADK,reg,e->u.s.info);
+break;
+}
+case VKNUM:{
+luaK_codeABx(fs,OP_LOADK,reg,luaK_numberK(fs,e->u.nval));
+break;
+}
+case VRELOCABLE:{
+Instruction*pc=&getcode(fs,e);
+SETARG_A(*pc,reg);
+break;
+}
+case VNONRELOC:{
+if(reg!=e->u.s.info)
+luaK_codeABC(fs,OP_MOVE,reg,e->u.s.info,0);
+break;
+}
+default:{
+return;
+}
+}
+e->u.s.info=reg;
+e->k=VNONRELOC;
+}
+static void discharge2anyreg(FuncState*fs,expdesc*e){
+if(e->k!=VNONRELOC){
+luaK_reserveregs(fs,1);
+discharge2reg(fs,e,fs->freereg-1);
+}
+}
+static void exp2reg(FuncState*fs,expdesc*e,int reg){
+discharge2reg(fs,e,reg);
+if(e->k==VJMP)
+luaK_concat(fs,&e->t,e->u.s.info);
+if(hasjumps(e)){
+int final;
+int p_f=(-1);
+int p_t=(-1);
+if(need_value(fs,e->t)||need_value(fs,e->f)){
+int fj=(e->k==VJMP)?(-1):luaK_jump(fs);
+p_f=code_label(fs,reg,0,1);
+p_t=code_label(fs,reg,1,0);
+luaK_patchtohere(fs,fj);
+}
+final=luaK_getlabel(fs);
+patchlistaux(fs,e->f,final,reg,p_f);
+patchlistaux(fs,e->t,final,reg,p_t);
+}
+e->f=e->t=(-1);
+e->u.s.info=reg;
+e->k=VNONRELOC;
+}
+static void luaK_exp2nextreg(FuncState*fs,expdesc*e){
+luaK_dischargevars(fs,e);
+freeexp(fs,e);
+luaK_reserveregs(fs,1);
+exp2reg(fs,e,fs->freereg-1);
+}
+static int luaK_exp2anyreg(FuncState*fs,expdesc*e){
+luaK_dischargevars(fs,e);
+if(e->k==VNONRELOC){
+if(!hasjumps(e))return e->u.s.info;
+if(e->u.s.info>=fs->nactvar){
+exp2reg(fs,e,e->u.s.info);
+return e->u.s.info;
+}
+}
+luaK_exp2nextreg(fs,e);
+return e->u.s.info;
+}
+static void luaK_exp2val(FuncState*fs,expdesc*e){
+if(hasjumps(e))
+luaK_exp2anyreg(fs,e);
+else
+luaK_dischargevars(fs,e);
+}
+static int luaK_exp2RK(FuncState*fs,expdesc*e){
+luaK_exp2val(fs,e);
+switch(e->k){
+case VKNUM:
+case VTRUE:
+case VFALSE:
+case VNIL:{
+if(fs->nk<=((1<<(9-1))-1)){
+e->u.s.info=(e->k==VNIL)?nilK(fs):
+(e->k==VKNUM)?luaK_numberK(fs,e->u.nval):
+boolK(fs,(e->k==VTRUE));
+e->k=VK;
+return RKASK(e->u.s.info);
+}
+else break;
+}
+case VK:{
+if(e->u.s.info<=((1<<(9-1))-1))
+return RKASK(e->u.s.info);
+else break;
+}
+default:break;
+}
+return luaK_exp2anyreg(fs,e);
+}
+static void luaK_storevar(FuncState*fs,expdesc*var,expdesc*ex){
+switch(var->k){
+case VLOCAL:{
+freeexp(fs,ex);
+exp2reg(fs,ex,var->u.s.info);
+return;
+}
+case VUPVAL:{
+int e=luaK_exp2anyreg(fs,ex);
+luaK_codeABC(fs,OP_SETUPVAL,e,var->u.s.info,0);
+break;
+}
+case VGLOBAL:{
+int e=luaK_exp2anyreg(fs,ex);
+luaK_codeABx(fs,OP_SETGLOBAL,e,var->u.s.info);
+break;
+}
+case VINDEXED:{
+int e=luaK_exp2RK(fs,ex);
+luaK_codeABC(fs,OP_SETTABLE,var->u.s.info,var->u.s.aux,e);
+break;
+}
+default:{
+break;
+}
+}
+freeexp(fs,ex);
+}
+static void luaK_self(FuncState*fs,expdesc*e,expdesc*key){
+int func;
+luaK_exp2anyreg(fs,e);
+freeexp(fs,e);
+func=fs->freereg;
+luaK_reserveregs(fs,2);
+luaK_codeABC(fs,OP_SELF,func,e->u.s.info,luaK_exp2RK(fs,key));
+freeexp(fs,key);
+e->u.s.info=func;
+e->k=VNONRELOC;
+}
+static void invertjump(FuncState*fs,expdesc*e){
+Instruction*pc=getjumpcontrol(fs,e->u.s.info);
+SETARG_A(*pc,!(GETARG_A(*pc)));
+}
+static int jumponcond(FuncState*fs,expdesc*e,int cond){
+if(e->k==VRELOCABLE){
+Instruction ie=getcode(fs,e);
+if(GET_OPCODE(ie)==OP_NOT){
+fs->pc--;
+return condjump(fs,OP_TEST,GETARG_B(ie),0,!cond);
+}
+}
+discharge2anyreg(fs,e);
+freeexp(fs,e);
+return condjump(fs,OP_TESTSET,((1<<8)-1),e->u.s.info,cond);
+}
+static void luaK_goiftrue(FuncState*fs,expdesc*e){
+int pc;
+luaK_dischargevars(fs,e);
+switch(e->k){
+case VK:case VKNUM:case VTRUE:{
+pc=(-1);
+break;
+}
+case VJMP:{
+invertjump(fs,e);
+pc=e->u.s.info;
+break;
+}
+default:{
+pc=jumponcond(fs,e,0);
+break;
+}
+}
+luaK_concat(fs,&e->f,pc);
+luaK_patchtohere(fs,e->t);
+e->t=(-1);
+}
+static void luaK_goiffalse(FuncState*fs,expdesc*e){
+int pc;
+luaK_dischargevars(fs,e);
+switch(e->k){
+case VNIL:case VFALSE:{
+pc=(-1);
+break;
+}
+case VJMP:{
+pc=e->u.s.info;
+break;
+}
+default:{
+pc=jumponcond(fs,e,1);
+break;
+}
+}
+luaK_concat(fs,&e->t,pc);
+luaK_patchtohere(fs,e->f);
+e->f=(-1);
+}
+static void codenot(FuncState*fs,expdesc*e){
+luaK_dischargevars(fs,e);
+switch(e->k){
+case VNIL:case VFALSE:{
+e->k=VTRUE;
+break;
+}
+case VK:case VKNUM:case VTRUE:{
+e->k=VFALSE;
+break;
+}
+case VJMP:{
+invertjump(fs,e);
+break;
+}
+case VRELOCABLE:
+case VNONRELOC:{
+discharge2anyreg(fs,e);
+freeexp(fs,e);
+e->u.s.info=luaK_codeABC(fs,OP_NOT,0,e->u.s.info,0);
+e->k=VRELOCABLE;
+break;
+}
+default:{
+break;
+}
+}
+{int temp=e->f;e->f=e->t;e->t=temp;}
+removevalues(fs,e->f);
+removevalues(fs,e->t);
+}
+static void luaK_indexed(FuncState*fs,expdesc*t,expdesc*k){
+t->u.s.aux=luaK_exp2RK(fs,k);
+t->k=VINDEXED;
+}
+static int constfolding(OpCode op,expdesc*e1,expdesc*e2){
+lua_Number v1,v2,r;
+if(!isnumeral(e1)||!isnumeral(e2))return 0;
+v1=e1->u.nval;
+v2=e2->u.nval;
+switch(op){
+case OP_ADD:r=luai_numadd(v1,v2);break;
+case OP_SUB:r=luai_numsub(v1,v2);break;
+case OP_MUL:r=luai_nummul(v1,v2);break;
+case OP_DIV:
+if(v2==0)return 0;
+r=luai_numdiv(v1,v2);break;
+case OP_MOD:
+if(v2==0)return 0;
+r=luai_nummod(v1,v2);break;
+case OP_POW:r=luai_numpow(v1,v2);break;
+case OP_UNM:r=luai_numunm(v1);break;
+case OP_LEN:return 0;
+default:r=0;break;
+}
+if(luai_numisnan(r))return 0;
+e1->u.nval=r;
+return 1;
+}
+static void codearith(FuncState*fs,OpCode op,expdesc*e1,expdesc*e2){
+if(constfolding(op,e1,e2))
+return;
+else{
+int o2=(op!=OP_UNM&&op!=OP_LEN)?luaK_exp2RK(fs,e2):0;
+int o1=luaK_exp2RK(fs,e1);
+if(o1>o2){
+freeexp(fs,e1);
+freeexp(fs,e2);
+}
+else{
+freeexp(fs,e2);
+freeexp(fs,e1);
+}
+e1->u.s.info=luaK_codeABC(fs,op,0,o1,o2);
+e1->k=VRELOCABLE;
+}
+}
+static void codecomp(FuncState*fs,OpCode op,int cond,expdesc*e1,
+expdesc*e2){
+int o1=luaK_exp2RK(fs,e1);
+int o2=luaK_exp2RK(fs,e2);
+freeexp(fs,e2);
+freeexp(fs,e1);
+if(cond==0&&op!=OP_EQ){
+int temp;
+temp=o1;o1=o2;o2=temp;
+cond=1;
+}
+e1->u.s.info=condjump(fs,op,cond,o1,o2);
+e1->k=VJMP;
+}
+static void luaK_prefix(FuncState*fs,UnOpr op,expdesc*e){
+expdesc e2;
+e2.t=e2.f=(-1);e2.k=VKNUM;e2.u.nval=0;
+switch(op){
+case OPR_MINUS:{
+if(!isnumeral(e))
+luaK_exp2anyreg(fs,e);
+codearith(fs,OP_UNM,e,&e2);
+break;
+}
+case OPR_NOT:codenot(fs,e);break;
+case OPR_LEN:{
+luaK_exp2anyreg(fs,e);
+codearith(fs,OP_LEN,e,&e2);
+break;
+}
+default:;
+}
+}
+static void luaK_infix(FuncState*fs,BinOpr op,expdesc*v){
+switch(op){
+case OPR_AND:{
+luaK_goiftrue(fs,v);
+break;
+}
+case OPR_OR:{
+luaK_goiffalse(fs,v);
+break;
+}
+case OPR_CONCAT:{
+luaK_exp2nextreg(fs,v);
+break;
+}
+case OPR_ADD:case OPR_SUB:case OPR_MUL:case OPR_DIV:
+case OPR_MOD:case OPR_POW:{
+if(!isnumeral(v))luaK_exp2RK(fs,v);
+break;
+}
+default:{
+luaK_exp2RK(fs,v);
+break;
+}
+}
+}
+static void luaK_posfix(FuncState*fs,BinOpr op,expdesc*e1,expdesc*e2){
+switch(op){
+case OPR_AND:{
+luaK_dischargevars(fs,e2);
+luaK_concat(fs,&e2->f,e1->f);
+*e1=*e2;
+break;
+}
+case OPR_OR:{
+luaK_dischargevars(fs,e2);
+luaK_concat(fs,&e2->t,e1->t);
+*e1=*e2;
+break;
+}
+case OPR_CONCAT:{
+luaK_exp2val(fs,e2);
+if(e2->k==VRELOCABLE&&GET_OPCODE(getcode(fs,e2))==OP_CONCAT){
+freeexp(fs,e1);
+SETARG_B(getcode(fs,e2),e1->u.s.info);
+e1->k=VRELOCABLE;e1->u.s.info=e2->u.s.info;
+}
+else{
+luaK_exp2nextreg(fs,e2);
+codearith(fs,OP_CONCAT,e1,e2);
+}
+break;
+}
+case OPR_ADD:codearith(fs,OP_ADD,e1,e2);break;
+case OPR_SUB:codearith(fs,OP_SUB,e1,e2);break;
+case OPR_MUL:codearith(fs,OP_MUL,e1,e2);break;
+case OPR_DIV:codearith(fs,OP_DIV,e1,e2);break;
+case OPR_MOD:codearith(fs,OP_MOD,e1,e2);break;
+case OPR_POW:codearith(fs,OP_POW,e1,e2);break;
+case OPR_EQ:codecomp(fs,OP_EQ,1,e1,e2);break;
+case OPR_NE:codecomp(fs,OP_EQ,0,e1,e2);break;
+case OPR_LT:codecomp(fs,OP_LT,1,e1,e2);break;
+case OPR_LE:codecomp(fs,OP_LE,1,e1,e2);break;
+case OPR_GT:codecomp(fs,OP_LT,0,e1,e2);break;
+case OPR_GE:codecomp(fs,OP_LE,0,e1,e2);break;
+default:;
+}
+}
+static void luaK_fixline(FuncState*fs,int line){
+fs->f->lineinfo[fs->pc-1]=line;
+}
+static int luaK_code(FuncState*fs,Instruction i,int line){
+Proto*f=fs->f;
+dischargejpc(fs);
+luaM_growvector(fs->L,f->code,fs->pc,f->sizecode,Instruction,
+(INT_MAX-2),"code size overflow");
+f->code[fs->pc]=i;
+luaM_growvector(fs->L,f->lineinfo,fs->pc,f->sizelineinfo,int,
+(INT_MAX-2),"code size overflow");
+f->lineinfo[fs->pc]=line;
+return fs->pc++;
+}
+static int luaK_codeABC(FuncState*fs,OpCode o,int a,int b,int c){
+return luaK_code(fs,CREATE_ABC(o,a,b,c),fs->ls->lastline);
+}
+static int luaK_codeABx(FuncState*fs,OpCode o,int a,unsigned int bc){
+return luaK_code(fs,CREATE_ABx(o,a,bc),fs->ls->lastline);
+}
+static void luaK_setlist(FuncState*fs,int base,int nelems,int tostore){
+int c=(nelems-1)/50+1;
+int b=(tostore==(-1))?0:tostore;
+if(c<=((1<<9)-1))
+luaK_codeABC(fs,OP_SETLIST,base,b,c);
+else{
+luaK_codeABC(fs,OP_SETLIST,base,b,0);
+luaK_code(fs,cast(Instruction,c),fs->ls->lastline);
+}
+fs->freereg=base+1;
+}
+#define hasmultret(k)((k)==VCALL||(k)==VVARARG)
+#define getlocvar(fs,i)((fs)->f->locvars[(fs)->actvar[i]])
+#define luaY_checklimit(fs,v,l,m)if((v)>(l))errorlimit(fs,l,m)
+typedef struct BlockCnt{
+struct BlockCnt*previous;
+int breaklist;
+lu_byte nactvar;
+lu_byte upval;
+lu_byte isbreakable;
+}BlockCnt;
+static void chunk(LexState*ls);
+static void expr(LexState*ls,expdesc*v);
+static void anchor_token(LexState*ls){
+if(ls->t.token==TK_NAME||ls->t.token==TK_STRING){
+TString*ts=ls->t.seminfo.ts;
+luaX_newstring(ls,getstr(ts),ts->tsv.len);
+}
+}
+static void error_expected(LexState*ls,int token){
+luaX_syntaxerror(ls,
+luaO_pushfstring(ls->L,LUA_QL("%s")" expected",luaX_token2str(ls,token)));
+}
+static void errorlimit(FuncState*fs,int limit,const char*what){
+const char*msg=(fs->f->linedefined==0)?
+luaO_pushfstring(fs->L,"main function has more than %d %s",limit,what):
+luaO_pushfstring(fs->L,"function at line %d has more than %d %s",
+fs->f->linedefined,limit,what);
+luaX_lexerror(fs->ls,msg,0);
+}
+static int testnext(LexState*ls,int c){
+if(ls->t.token==c){
+luaX_next(ls);
+return 1;
+}
+else return 0;
+}
+static void check(LexState*ls,int c){
+if(ls->t.token!=c)
+error_expected(ls,c);
+}
+static void checknext(LexState*ls,int c){
+check(ls,c);
+luaX_next(ls);
+}
+#define check_condition(ls,c,msg){if(!(c))luaX_syntaxerror(ls,msg);}
+static void check_match(LexState*ls,int what,int who,int where){
+if(!testnext(ls,what)){
+if(where==ls->linenumber)
+error_expected(ls,what);
+else{
+luaX_syntaxerror(ls,luaO_pushfstring(ls->L,
+LUA_QL("%s")" expected (to close "LUA_QL("%s")" at line %d)",
+luaX_token2str(ls,what),luaX_token2str(ls,who),where));
+}
+}
+}
+static TString*str_checkname(LexState*ls){
+TString*ts;
+check(ls,TK_NAME);
+ts=ls->t.seminfo.ts;
+luaX_next(ls);
+return ts;
+}
+static void init_exp(expdesc*e,expkind k,int i){
+e->f=e->t=(-1);
+e->k=k;
+e->u.s.info=i;
+}
+static void codestring(LexState*ls,expdesc*e,TString*s){
+init_exp(e,VK,luaK_stringK(ls->fs,s));
+}
+static void checkname(LexState*ls,expdesc*e){
+codestring(ls,e,str_checkname(ls));
+}
+static int registerlocalvar(LexState*ls,TString*varname){
+FuncState*fs=ls->fs;
+Proto*f=fs->f;
+int oldsize=f->sizelocvars;
+luaM_growvector(ls->L,f->locvars,fs->nlocvars,f->sizelocvars,
+LocVar,SHRT_MAX,"too many local variables");
+while(oldsize<f->sizelocvars)f->locvars[oldsize++].varname=NULL;
+f->locvars[fs->nlocvars].varname=varname;
+luaC_objbarrier(ls->L,f,varname);
+return fs->nlocvars++;
+}
+#define new_localvarliteral(ls,v,n)new_localvar(ls,luaX_newstring(ls,""v,(sizeof(v)/sizeof(char))-1),n)
+static void new_localvar(LexState*ls,TString*name,int n){
+FuncState*fs=ls->fs;
+luaY_checklimit(fs,fs->nactvar+n+1,200,"local variables");
+fs->actvar[fs->nactvar+n]=cast(unsigned short,registerlocalvar(ls,name));
+}
+static void adjustlocalvars(LexState*ls,int nvars){
+FuncState*fs=ls->fs;
+fs->nactvar=cast_byte(fs->nactvar+nvars);
+for(;nvars;nvars--){
+getlocvar(fs,fs->nactvar-nvars).startpc=fs->pc;
+}
+}
+static void removevars(LexState*ls,int tolevel){
+FuncState*fs=ls->fs;
+while(fs->nactvar>tolevel)
+getlocvar(fs,--fs->nactvar).endpc=fs->pc;
+}
+static int indexupvalue(FuncState*fs,TString*name,expdesc*v){
+int i;
+Proto*f=fs->f;
+int oldsize=f->sizeupvalues;
+for(i=0;i<f->nups;i++){
+if(fs->upvalues[i].k==v->k&&fs->upvalues[i].info==v->u.s.info){
+return i;
+}
+}
+luaY_checklimit(fs,f->nups+1,60,"upvalues");
+luaM_growvector(fs->L,f->upvalues,f->nups,f->sizeupvalues,
+TString*,(INT_MAX-2),"");
+while(oldsize<f->sizeupvalues)f->upvalues[oldsize++]=NULL;
+f->upvalues[f->nups]=name;
+luaC_objbarrier(fs->L,f,name);
+fs->upvalues[f->nups].k=cast_byte(v->k);
+fs->upvalues[f->nups].info=cast_byte(v->u.s.info);
+return f->nups++;
+}
+static int searchvar(FuncState*fs,TString*n){
+int i;
+for(i=fs->nactvar-1;i>=0;i--){
+if(n==getlocvar(fs,i).varname)
+return i;
+}
+return-1;
+}
+static void markupval(FuncState*fs,int level){
+BlockCnt*bl=fs->bl;
+while(bl&&bl->nactvar>level)bl=bl->previous;
+if(bl)bl->upval=1;
+}
+static int singlevaraux(FuncState*fs,TString*n,expdesc*var,int base){
+if(fs==NULL){
+init_exp(var,VGLOBAL,((1<<8)-1));
+return VGLOBAL;
+}
+else{
+int v=searchvar(fs,n);
+if(v>=0){
+init_exp(var,VLOCAL,v);
+if(!base)
+markupval(fs,v);
+return VLOCAL;
+}
+else{
+if(singlevaraux(fs->prev,n,var,0)==VGLOBAL)
+return VGLOBAL;
+var->u.s.info=indexupvalue(fs,n,var);
+var->k=VUPVAL;
+return VUPVAL;
+}
+}
+}
+static void singlevar(LexState*ls,expdesc*var){
+TString*varname=str_checkname(ls);
+FuncState*fs=ls->fs;
+if(singlevaraux(fs,varname,var,1)==VGLOBAL)
+var->u.s.info=luaK_stringK(fs,varname);
+}
+static void adjust_assign(LexState*ls,int nvars,int nexps,expdesc*e){
+FuncState*fs=ls->fs;
+int extra=nvars-nexps;
+if(hasmultret(e->k)){
+extra++;
+if(extra<0)extra=0;
+luaK_setreturns(fs,e,extra);
+if(extra>1)luaK_reserveregs(fs,extra-1);
+}
+else{
+if(e->k!=VVOID)luaK_exp2nextreg(fs,e);
+if(extra>0){
+int reg=fs->freereg;
+luaK_reserveregs(fs,extra);
+luaK_nil(fs,reg,extra);
+}
+}
+}
+static void enterlevel(LexState*ls){
+if(++ls->L->nCcalls>200)
+luaX_lexerror(ls,"chunk has too many syntax levels",0);
+}
+#define leavelevel(ls)((ls)->L->nCcalls--)
+static void enterblock(FuncState*fs,BlockCnt*bl,lu_byte isbreakable){
+bl->breaklist=(-1);
+bl->isbreakable=isbreakable;
+bl->nactvar=fs->nactvar;
+bl->upval=0;
+bl->previous=fs->bl;
+fs->bl=bl;
+}
+static void leaveblock(FuncState*fs){
+BlockCnt*bl=fs->bl;
+fs->bl=bl->previous;
+removevars(fs->ls,bl->nactvar);
+if(bl->upval)
+luaK_codeABC(fs,OP_CLOSE,bl->nactvar,0,0);
+fs->freereg=fs->nactvar;
+luaK_patchtohere(fs,bl->breaklist);
+}
+static void pushclosure(LexState*ls,FuncState*func,expdesc*v){
+FuncState*fs=ls->fs;
+Proto*f=fs->f;
+int oldsize=f->sizep;
+int i;
+luaM_growvector(ls->L,f->p,fs->np,f->sizep,Proto*,
+((1<<(9+9))-1),"constant table overflow");
+while(oldsize<f->sizep)f->p[oldsize++]=NULL;
+f->p[fs->np++]=func->f;
+luaC_objbarrier(ls->L,f,func->f);
+init_exp(v,VRELOCABLE,luaK_codeABx(fs,OP_CLOSURE,0,fs->np-1));
+for(i=0;i<func->f->nups;i++){
+OpCode o=(func->upvalues[i].k==VLOCAL)?OP_MOVE:OP_GETUPVAL;
+luaK_codeABC(fs,o,0,func->upvalues[i].info,0);
+}
+}
+static void open_func(LexState*ls,FuncState*fs){
+lua_State*L=ls->L;
+Proto*f=luaF_newproto(L);
+fs->f=f;
+fs->prev=ls->fs;
+fs->ls=ls;
+fs->L=L;
+ls->fs=fs;
+fs->pc=0;
+fs->lasttarget=-1;
+fs->jpc=(-1);
+fs->freereg=0;
+fs->nk=0;
+fs->np=0;
+fs->nlocvars=0;
+fs->nactvar=0;
+fs->bl=NULL;
+f->source=ls->source;
+f->maxstacksize=2;
+fs->h=luaH_new(L,0,0);
+sethvalue(L,L->top,fs->h);
+incr_top(L);
+setptvalue(L,L->top,f);
+incr_top(L);
+}
+static void close_func(LexState*ls){
+lua_State*L=ls->L;
+FuncState*fs=ls->fs;
+Proto*f=fs->f;
+removevars(ls,0);
+luaK_ret(fs,0,0);
+luaM_reallocvector(L,f->code,f->sizecode,fs->pc,Instruction);
+f->sizecode=fs->pc;
+luaM_reallocvector(L,f->lineinfo,f->sizelineinfo,fs->pc,int);
+f->sizelineinfo=fs->pc;
+luaM_reallocvector(L,f->k,f->sizek,fs->nk,TValue);
+f->sizek=fs->nk;
+luaM_reallocvector(L,f->p,f->sizep,fs->np,Proto*);
+f->sizep=fs->np;
+luaM_reallocvector(L,f->locvars,f->sizelocvars,fs->nlocvars,LocVar);
+f->sizelocvars=fs->nlocvars;
+luaM_reallocvector(L,f->upvalues,f->sizeupvalues,f->nups,TString*);
+f->sizeupvalues=f->nups;
+ls->fs=fs->prev;
+if(fs)anchor_token(ls);
+L->top-=2;
+}
+static Proto*luaY_parser(lua_State*L,ZIO*z,Mbuffer*buff,const char*name){
+struct LexState lexstate;
+struct FuncState funcstate;
+lexstate.buff=buff;
+luaX_setinput(L,&lexstate,z,luaS_new(L,name));
+open_func(&lexstate,&funcstate);
+funcstate.f->is_vararg=2;
+luaX_next(&lexstate);
+chunk(&lexstate);
+check(&lexstate,TK_EOS);
+close_func(&lexstate);
+return funcstate.f;
+}
+static void field(LexState*ls,expdesc*v){
+FuncState*fs=ls->fs;
+expdesc key;
+luaK_exp2anyreg(fs,v);
+luaX_next(ls);
+checkname(ls,&key);
+luaK_indexed(fs,v,&key);
+}
+static void yindex(LexState*ls,expdesc*v){
+luaX_next(ls);
+expr(ls,v);
+luaK_exp2val(ls->fs,v);
+checknext(ls,']');
+}
+struct ConsControl{
+expdesc v;
+expdesc*t;
+int nh;
+int na;
+int tostore;
+};
+static void recfield(LexState*ls,struct ConsControl*cc){
+FuncState*fs=ls->fs;
+int reg=ls->fs->freereg;
+expdesc key,val;
+int rkkey;
+if(ls->t.token==TK_NAME){
+luaY_checklimit(fs,cc->nh,(INT_MAX-2),"items in a constructor");
+checkname(ls,&key);
+}
+else
+yindex(ls,&key);
+cc->nh++;
+checknext(ls,'=');
+rkkey=luaK_exp2RK(fs,&key);
+expr(ls,&val);
+luaK_codeABC(fs,OP_SETTABLE,cc->t->u.s.info,rkkey,luaK_exp2RK(fs,&val));
+fs->freereg=reg;
+}
+static void closelistfield(FuncState*fs,struct ConsControl*cc){
+if(cc->v.k==VVOID)return;
+luaK_exp2nextreg(fs,&cc->v);
+cc->v.k=VVOID;
+if(cc->tostore==50){
+luaK_setlist(fs,cc->t->u.s.info,cc->na,cc->tostore);
+cc->tostore=0;
+}
+}
+static void lastlistfield(FuncState*fs,struct ConsControl*cc){
+if(cc->tostore==0)return;
+if(hasmultret(cc->v.k)){
+luaK_setmultret(fs,&cc->v);
+luaK_setlist(fs,cc->t->u.s.info,cc->na,(-1));
+cc->na--;
+}
+else{
+if(cc->v.k!=VVOID)
+luaK_exp2nextreg(fs,&cc->v);
+luaK_setlist(fs,cc->t->u.s.info,cc->na,cc->tostore);
+}
+}
+static void listfield(LexState*ls,struct ConsControl*cc){
+expr(ls,&cc->v);
+luaY_checklimit(ls->fs,cc->na,(INT_MAX-2),"items in a constructor");
+cc->na++;
+cc->tostore++;
+}
+static void constructor(LexState*ls,expdesc*t){
+FuncState*fs=ls->fs;
+int line=ls->linenumber;
+int pc=luaK_codeABC(fs,OP_NEWTABLE,0,0,0);
+struct ConsControl cc;
+cc.na=cc.nh=cc.tostore=0;
+cc.t=t;
+init_exp(t,VRELOCABLE,pc);
+init_exp(&cc.v,VVOID,0);
+luaK_exp2nextreg(ls->fs,t);
+checknext(ls,'{');
+do{
+if(ls->t.token=='}')break;
+closelistfield(fs,&cc);
+switch(ls->t.token){
+case TK_NAME:{
+luaX_lookahead(ls);
+if(ls->lookahead.token!='=')
+listfield(ls,&cc);
+else
+recfield(ls,&cc);
+break;
+}
+case'[':{
+recfield(ls,&cc);
+break;
+}
+default:{
+listfield(ls,&cc);
+break;
+}
+}
+}while(testnext(ls,',')||testnext(ls,';'));
+check_match(ls,'}','{',line);
+lastlistfield(fs,&cc);
+SETARG_B(fs->f->code[pc],luaO_int2fb(cc.na));
+SETARG_C(fs->f->code[pc],luaO_int2fb(cc.nh));
+}
+static void parlist(LexState*ls){
+FuncState*fs=ls->fs;
+Proto*f=fs->f;
+int nparams=0;
+f->is_vararg=0;
+if(ls->t.token!=')'){
+do{
+switch(ls->t.token){
+case TK_NAME:{
+new_localvar(ls,str_checkname(ls),nparams++);
+break;
+}
+case TK_DOTS:{
+luaX_next(ls);
+f->is_vararg|=2;
+break;
+}
+default:luaX_syntaxerror(ls,"<name> or "LUA_QL("...")" expected");
+}
+}while(!f->is_vararg&&testnext(ls,','));
+}
+adjustlocalvars(ls,nparams);
+f->numparams=cast_byte(fs->nactvar-(f->is_vararg&1));
+luaK_reserveregs(fs,fs->nactvar);
+}
+static void body(LexState*ls,expdesc*e,int needself,int line){
+FuncState new_fs;
+open_func(ls,&new_fs);
+new_fs.f->linedefined=line;
+checknext(ls,'(');
+if(needself){
+new_localvarliteral(ls,"self",0);
+adjustlocalvars(ls,1);
+}
+parlist(ls);
+checknext(ls,')');
+chunk(ls);
+new_fs.f->lastlinedefined=ls->linenumber;
+check_match(ls,TK_END,TK_FUNCTION,line);
+close_func(ls);
+pushclosure(ls,&new_fs,e);
+}
+static int explist1(LexState*ls,expdesc*v){
+int n=1;
+expr(ls,v);
+while(testnext(ls,',')){
+luaK_exp2nextreg(ls->fs,v);
+expr(ls,v);
+n++;
+}
+return n;
+}
+static void funcargs(LexState*ls,expdesc*f){
+FuncState*fs=ls->fs;
+expdesc args;
+int base,nparams;
+int line=ls->linenumber;
+switch(ls->t.token){
+case'(':{
+if(line!=ls->lastline)
+luaX_syntaxerror(ls,"ambiguous syntax (function call x new statement)");
+luaX_next(ls);
+if(ls->t.token==')')
+args.k=VVOID;
+else{
+explist1(ls,&args);
+luaK_setmultret(fs,&args);
+}
+check_match(ls,')','(',line);
+break;
+}
+case'{':{
+constructor(ls,&args);
+break;
+}
+case TK_STRING:{
+codestring(ls,&args,ls->t.seminfo.ts);
+luaX_next(ls);
+break;
+}
+default:{
+luaX_syntaxerror(ls,"function arguments expected");
+return;
+}
+}
+base=f->u.s.info;
+if(hasmultret(args.k))
+nparams=(-1);
+else{
+if(args.k!=VVOID)
+luaK_exp2nextreg(fs,&args);
+nparams=fs->freereg-(base+1);
+}
+init_exp(f,VCALL,luaK_codeABC(fs,OP_CALL,base,nparams+1,2));
+luaK_fixline(fs,line);
+fs->freereg=base+1;
+}
+static void prefixexp(LexState*ls,expdesc*v){
+switch(ls->t.token){
+case'(':{
+int line=ls->linenumber;
+luaX_next(ls);
+expr(ls,v);
+check_match(ls,')','(',line);
+luaK_dischargevars(ls->fs,v);
+return;
+}
+case TK_NAME:{
+singlevar(ls,v);
+return;
+}
+default:{
+luaX_syntaxerror(ls,"unexpected symbol");
+return;
+}
+}
+}
+static void primaryexp(LexState*ls,expdesc*v){
+FuncState*fs=ls->fs;
+prefixexp(ls,v);
+for(;;){
+switch(ls->t.token){
+case'.':{
+field(ls,v);
+break;
+}
+case'[':{
+expdesc key;
+luaK_exp2anyreg(fs,v);
+yindex(ls,&key);
+luaK_indexed(fs,v,&key);
+break;
+}
+case':':{
+expdesc key;
+luaX_next(ls);
+checkname(ls,&key);
+luaK_self(fs,v,&key);
+funcargs(ls,v);
+break;
+}
+case'(':case TK_STRING:case'{':{
+luaK_exp2nextreg(fs,v);
+funcargs(ls,v);
+break;
+}
+default:return;
+}
+}
+}
+static void simpleexp(LexState*ls,expdesc*v){
+switch(ls->t.token){
+case TK_NUMBER:{
+init_exp(v,VKNUM,0);
+v->u.nval=ls->t.seminfo.r;
+break;
+}
+case TK_STRING:{
+codestring(ls,v,ls->t.seminfo.ts);
+break;
+}
+case TK_NIL:{
+init_exp(v,VNIL,0);
+break;
+}
+case TK_TRUE:{
+init_exp(v,VTRUE,0);
+break;
+}
+case TK_FALSE:{
+init_exp(v,VFALSE,0);
+break;
+}
+case TK_DOTS:{
+FuncState*fs=ls->fs;
+check_condition(ls,fs->f->is_vararg,
+"cannot use "LUA_QL("...")" outside a vararg function");
+fs->f->is_vararg&=~4;
+init_exp(v,VVARARG,luaK_codeABC(fs,OP_VARARG,0,1,0));
+break;
+}
+case'{':{
+constructor(ls,v);
+return;
+}
+case TK_FUNCTION:{
+luaX_next(ls);
+body(ls,v,0,ls->linenumber);
+return;
+}
+default:{
+primaryexp(ls,v);
+return;
+}
+}
+luaX_next(ls);
+}
+static UnOpr getunopr(int op){
+switch(op){
+case TK_NOT:return OPR_NOT;
+case'-':return OPR_MINUS;
+case'#':return OPR_LEN;
+default:return OPR_NOUNOPR;
+}
+}
+static BinOpr getbinopr(int op){
+switch(op){
+case'+':return OPR_ADD;
+case'-':return OPR_SUB;
+case'*':return OPR_MUL;
+case'/':return OPR_DIV;
+case'%':return OPR_MOD;
+case'^':return OPR_POW;
+case TK_CONCAT:return OPR_CONCAT;
+case TK_NE:return OPR_NE;
+case TK_EQ:return OPR_EQ;
+case'<':return OPR_LT;
+case TK_LE:return OPR_LE;
+case'>':return OPR_GT;
+case TK_GE:return OPR_GE;
+case TK_AND:return OPR_AND;
+case TK_OR:return OPR_OR;
+default:return OPR_NOBINOPR;
+}
+}
+static const struct{
+lu_byte left;
+lu_byte right;
+}priority[]={
+{6,6},{6,6},{7,7},{7,7},{7,7},
+{10,9},{5,4},
+{3,3},{3,3},
+{3,3},{3,3},{3,3},{3,3},
+{2,2},{1,1}
+};
+static BinOpr subexpr(LexState*ls,expdesc*v,unsigned int limit){
+BinOpr op;
+UnOpr uop;
+enterlevel(ls);
+uop=getunopr(ls->t.token);
+if(uop!=OPR_NOUNOPR){
+luaX_next(ls);
+subexpr(ls,v,8);
+luaK_prefix(ls->fs,uop,v);
+}
+else simpleexp(ls,v);
+op=getbinopr(ls->t.token);
+while(op!=OPR_NOBINOPR&&priority[op].left>limit){
+expdesc v2;
+BinOpr nextop;
+luaX_next(ls);
+luaK_infix(ls->fs,op,v);
+nextop=subexpr(ls,&v2,priority[op].right);
+luaK_posfix(ls->fs,op,v,&v2);
+op=nextop;
+}
+leavelevel(ls);
+return op;
+}
+static void expr(LexState*ls,expdesc*v){
+subexpr(ls,v,0);
+}
+static int block_follow(int token){
+switch(token){
+case TK_ELSE:case TK_ELSEIF:case TK_END:
+case TK_UNTIL:case TK_EOS:
+return 1;
+default:return 0;
+}
+}
+static void block(LexState*ls){
+FuncState*fs=ls->fs;
+BlockCnt bl;
+enterblock(fs,&bl,0);
+chunk(ls);
+leaveblock(fs);
+}
+struct LHS_assign{
+struct LHS_assign*prev;
+expdesc v;
+};
+static void check_conflict(LexState*ls,struct LHS_assign*lh,expdesc*v){
+FuncState*fs=ls->fs;
+int extra=fs->freereg;
+int conflict=0;
+for(;lh;lh=lh->prev){
+if(lh->v.k==VINDEXED){
+if(lh->v.u.s.info==v->u.s.info){
+conflict=1;
+lh->v.u.s.info=extra;
+}
+if(lh->v.u.s.aux==v->u.s.info){
+conflict=1;
+lh->v.u.s.aux=extra;
+}
+}
+}
+if(conflict){
+luaK_codeABC(fs,OP_MOVE,fs->freereg,v->u.s.info,0);
+luaK_reserveregs(fs,1);
+}
+}
+static void assignment(LexState*ls,struct LHS_assign*lh,int nvars){
+expdesc e;
+check_condition(ls,VLOCAL<=lh->v.k&&lh->v.k<=VINDEXED,
+"syntax error");
+if(testnext(ls,',')){
+struct LHS_assign nv;
+nv.prev=lh;
+primaryexp(ls,&nv.v);
+if(nv.v.k==VLOCAL)
+check_conflict(ls,lh,&nv.v);
+luaY_checklimit(ls->fs,nvars,200-ls->L->nCcalls,
+"variables in assignment");
+assignment(ls,&nv,nvars+1);
+}
+else{
+int nexps;
+checknext(ls,'=');
+nexps=explist1(ls,&e);
+if(nexps!=nvars){
+adjust_assign(ls,nvars,nexps,&e);
+if(nexps>nvars)
+ls->fs->freereg-=nexps-nvars;
+}
+else{
+luaK_setoneret(ls->fs,&e);
+luaK_storevar(ls->fs,&lh->v,&e);
+return;
+}
+}
+init_exp(&e,VNONRELOC,ls->fs->freereg-1);
+luaK_storevar(ls->fs,&lh->v,&e);
+}
+static int cond(LexState*ls){
+expdesc v;
+expr(ls,&v);
+if(v.k==VNIL)v.k=VFALSE;
+luaK_goiftrue(ls->fs,&v);
+return v.f;
+}
+static void breakstat(LexState*ls){
+FuncState*fs=ls->fs;
+BlockCnt*bl=fs->bl;
+int upval=0;
+while(bl&&!bl->isbreakable){
+upval|=bl->upval;
+bl=bl->previous;
+}
+if(!bl)
+luaX_syntaxerror(ls,"no loop to break");
+if(upval)
+luaK_codeABC(fs,OP_CLOSE,bl->nactvar,0,0);
+luaK_concat(fs,&bl->breaklist,luaK_jump(fs));
+}
+static void whilestat(LexState*ls,int line){
+FuncState*fs=ls->fs;
+int whileinit;
+int condexit;
+BlockCnt bl;
+luaX_next(ls);
+whileinit=luaK_getlabel(fs);
+condexit=cond(ls);
+enterblock(fs,&bl,1);
+checknext(ls,TK_DO);
+block(ls);
+luaK_patchlist(fs,luaK_jump(fs),whileinit);
+check_match(ls,TK_END,TK_WHILE,line);
+leaveblock(fs);
+luaK_patchtohere(fs,condexit);
+}
+static void repeatstat(LexState*ls,int line){
+int condexit;
+FuncState*fs=ls->fs;
+int repeat_init=luaK_getlabel(fs);
+BlockCnt bl1,bl2;
+enterblock(fs,&bl1,1);
+enterblock(fs,&bl2,0);
+luaX_next(ls);
+chunk(ls);
+check_match(ls,TK_UNTIL,TK_REPEAT,line);
+condexit=cond(ls);
+if(!bl2.upval){
+leaveblock(fs);
+luaK_patchlist(ls->fs,condexit,repeat_init);
+}
+else{
+breakstat(ls);
+luaK_patchtohere(ls->fs,condexit);
+leaveblock(fs);
+luaK_patchlist(ls->fs,luaK_jump(fs),repeat_init);
+}
+leaveblock(fs);
+}
+static int exp1(LexState*ls){
+expdesc e;
+int k;
+expr(ls,&e);
+k=e.k;
+luaK_exp2nextreg(ls->fs,&e);
+return k;
+}
+static void forbody(LexState*ls,int base,int line,int nvars,int isnum){
+BlockCnt bl;
+FuncState*fs=ls->fs;
+int prep,endfor;
+adjustlocalvars(ls,3);
+checknext(ls,TK_DO);
+prep=isnum?luaK_codeAsBx(fs,OP_FORPREP,base,(-1)):luaK_jump(fs);
+enterblock(fs,&bl,0);
+adjustlocalvars(ls,nvars);
+luaK_reserveregs(fs,nvars);
+block(ls);
+leaveblock(fs);
+luaK_patchtohere(fs,prep);
+endfor=(isnum)?luaK_codeAsBx(fs,OP_FORLOOP,base,(-1)):
+luaK_codeABC(fs,OP_TFORLOOP,base,0,nvars);
+luaK_fixline(fs,line);
+luaK_patchlist(fs,(isnum?endfor:luaK_jump(fs)),prep+1);
+}
+static void fornum(LexState*ls,TString*varname,int line){
+FuncState*fs=ls->fs;
+int base=fs->freereg;
+new_localvarliteral(ls,"(for index)",0);
+new_localvarliteral(ls,"(for limit)",1);
+new_localvarliteral(ls,"(for step)",2);
+new_localvar(ls,varname,3);
+checknext(ls,'=');
+exp1(ls);
+checknext(ls,',');
+exp1(ls);
+if(testnext(ls,','))
+exp1(ls);
+else{
+luaK_codeABx(fs,OP_LOADK,fs->freereg,luaK_numberK(fs,1));
+luaK_reserveregs(fs,1);
+}
+forbody(ls,base,line,1,1);
+}
+static void forlist(LexState*ls,TString*indexname){
+FuncState*fs=ls->fs;
+expdesc e;
+int nvars=0;
+int line;
+int base=fs->freereg;
+new_localvarliteral(ls,"(for generator)",nvars++);
+new_localvarliteral(ls,"(for state)",nvars++);
+new_localvarliteral(ls,"(for control)",nvars++);
+new_localvar(ls,indexname,nvars++);
+while(testnext(ls,','))
+new_localvar(ls,str_checkname(ls),nvars++);
+checknext(ls,TK_IN);
+line=ls->linenumber;
+adjust_assign(ls,3,explist1(ls,&e),&e);
+luaK_checkstack(fs,3);
+forbody(ls,base,line,nvars-3,0);
+}
+static void forstat(LexState*ls,int line){
+FuncState*fs=ls->fs;
+TString*varname;
+BlockCnt bl;
+enterblock(fs,&bl,1);
+luaX_next(ls);
+varname=str_checkname(ls);
+switch(ls->t.token){
+case'=':fornum(ls,varname,line);break;
+case',':case TK_IN:forlist(ls,varname);break;
+default:luaX_syntaxerror(ls,LUA_QL("=")" or "LUA_QL("in")" expected");
+}
+check_match(ls,TK_END,TK_FOR,line);
+leaveblock(fs);
+}
+static int test_then_block(LexState*ls){
+int condexit;
+luaX_next(ls);
+condexit=cond(ls);
+checknext(ls,TK_THEN);
+block(ls);
+return condexit;
+}
+static void ifstat(LexState*ls,int line){
+FuncState*fs=ls->fs;
+int flist;
+int escapelist=(-1);
+flist=test_then_block(ls);
+while(ls->t.token==TK_ELSEIF){
+luaK_concat(fs,&escapelist,luaK_jump(fs));
+luaK_patchtohere(fs,flist);
+flist=test_then_block(ls);
+}
+if(ls->t.token==TK_ELSE){
+luaK_concat(fs,&escapelist,luaK_jump(fs));
+luaK_patchtohere(fs,flist);
+luaX_next(ls);
+block(ls);
+}
+else
+luaK_concat(fs,&escapelist,flist);
+luaK_patchtohere(fs,escapelist);
+check_match(ls,TK_END,TK_IF,line);
+}
+static void localfunc(LexState*ls){
+expdesc v,b;
+FuncState*fs=ls->fs;
+new_localvar(ls,str_checkname(ls),0);
+init_exp(&v,VLOCAL,fs->freereg);
+luaK_reserveregs(fs,1);
+adjustlocalvars(ls,1);
+body(ls,&b,0,ls->linenumber);
+luaK_storevar(fs,&v,&b);
+getlocvar(fs,fs->nactvar-1).startpc=fs->pc;
+}
+static void localstat(LexState*ls){
+int nvars=0;
+int nexps;
+expdesc e;
+do{
+new_localvar(ls,str_checkname(ls),nvars++);
+}while(testnext(ls,','));
+if(testnext(ls,'='))
+nexps=explist1(ls,&e);
+else{
+e.k=VVOID;
+nexps=0;
+}
+adjust_assign(ls,nvars,nexps,&e);
+adjustlocalvars(ls,nvars);
+}
+static int funcname(LexState*ls,expdesc*v){
+int needself=0;
+singlevar(ls,v);
+while(ls->t.token=='.')
+field(ls,v);
+if(ls->t.token==':'){
+needself=1;
+field(ls,v);
+}
+return needself;
+}
+static void funcstat(LexState*ls,int line){
+int needself;
+expdesc v,b;
+luaX_next(ls);
+needself=funcname(ls,&v);
+body(ls,&b,needself,line);
+luaK_storevar(ls->fs,&v,&b);
+luaK_fixline(ls->fs,line);
+}
+static void exprstat(LexState*ls){
+FuncState*fs=ls->fs;
+struct LHS_assign v;
+primaryexp(ls,&v.v);
+if(v.v.k==VCALL)
+SETARG_C(getcode(fs,&v.v),1);
+else{
+v.prev=NULL;
+assignment(ls,&v,1);
+}
+}
+static void retstat(LexState*ls){
+FuncState*fs=ls->fs;
+expdesc e;
+int first,nret;
+luaX_next(ls);
+if(block_follow(ls->t.token)||ls->t.token==';')
+first=nret=0;
+else{
+nret=explist1(ls,&e);
+if(hasmultret(e.k)){
+luaK_setmultret(fs,&e);
+if(e.k==VCALL&&nret==1){
+SET_OPCODE(getcode(fs,&e),OP_TAILCALL);
+}
+first=fs->nactvar;
+nret=(-1);
+}
+else{
+if(nret==1)
+first=luaK_exp2anyreg(fs,&e);
+else{
+luaK_exp2nextreg(fs,&e);
+first=fs->nactvar;
+}
+}
+}
+luaK_ret(fs,first,nret);
+}
+static int statement(LexState*ls){
+int line=ls->linenumber;
+switch(ls->t.token){
+case TK_IF:{
+ifstat(ls,line);
+return 0;
+}
+case TK_WHILE:{
+whilestat(ls,line);
+return 0;
+}
+case TK_DO:{
+luaX_next(ls);
+block(ls);
+check_match(ls,TK_END,TK_DO,line);
+return 0;
+}
+case TK_FOR:{
+forstat(ls,line);
+return 0;
+}
+case TK_REPEAT:{
+repeatstat(ls,line);
+return 0;
+}
+case TK_FUNCTION:{
+funcstat(ls,line);
+return 0;
+}
+case TK_LOCAL:{
+luaX_next(ls);
+if(testnext(ls,TK_FUNCTION))
+localfunc(ls);
+else
+localstat(ls);
+return 0;
+}
+case TK_RETURN:{
+retstat(ls);
+return 1;
+}
+case TK_BREAK:{
+luaX_next(ls);
+breakstat(ls);
+return 1;
+}
+default:{
+exprstat(ls);
+return 0;
+}
+}
+}
+static void chunk(LexState*ls){
+int islast=0;
+enterlevel(ls);
+while(!islast&&!block_follow(ls->t.token)){
+islast=statement(ls);
+testnext(ls,';');
+ls->fs->freereg=ls->fs->nactvar;
+}
+leavelevel(ls);
+}
+static const TValue*luaV_tonumber(const TValue*obj,TValue*n){
+lua_Number num;
+if(ttisnumber(obj))return obj;
+if(ttisstring(obj)&&luaO_str2d(svalue(obj),&num)){
+setnvalue(n,num);
+return n;
+}
+else
+return NULL;
+}
+static int luaV_tostring(lua_State*L,StkId obj){
+if(!ttisnumber(obj))
+return 0;
+else{
+char s[32];
+lua_Number n=nvalue(obj);
+lua_number2str(s,n);
+setsvalue(L,obj,luaS_new(L,s));
+return 1;
+}
+}
+static void callTMres(lua_State*L,StkId res,const TValue*f,
+const TValue*p1,const TValue*p2){
+ptrdiff_t result=savestack(L,res);
+setobj(L,L->top,f);
+setobj(L,L->top+1,p1);
+setobj(L,L->top+2,p2);
+luaD_checkstack(L,3);
+L->top+=3;
+luaD_call(L,L->top-3,1);
+res=restorestack(L,result);
+L->top--;
+setobj(L,res,L->top);
+}
+static void callTM(lua_State*L,const TValue*f,const TValue*p1,
+const TValue*p2,const TValue*p3){
+setobj(L,L->top,f);
+setobj(L,L->top+1,p1);
+setobj(L,L->top+2,p2);
+setobj(L,L->top+3,p3);
+luaD_checkstack(L,4);
+L->top+=4;
+luaD_call(L,L->top-4,0);
+}
+static void luaV_gettable(lua_State*L,const TValue*t,TValue*key,StkId val){
+int loop;
+for(loop=0;loop<100;loop++){
+const TValue*tm;
+if(ttistable(t)){
+Table*h=hvalue(t);
+const TValue*res=luaH_get(h,key);
+if(!ttisnil(res)||
+(tm=fasttm(L,h->metatable,TM_INDEX))==NULL){
+setobj(L,val,res);
+return;
+}
+}
+else if(ttisnil(tm=luaT_gettmbyobj(L,t,TM_INDEX)))
+luaG_typeerror(L,t,"index");
+if(ttisfunction(tm)){
+callTMres(L,val,tm,t,key);
+return;
+}
+t=tm;
+}
+luaG_runerror(L,"loop in gettable");
+}
+static void luaV_settable(lua_State*L,const TValue*t,TValue*key,StkId val){
+int loop;
+TValue temp;
+for(loop=0;loop<100;loop++){
+const TValue*tm;
+if(ttistable(t)){
+Table*h=hvalue(t);
+TValue*oldval=luaH_set(L,h,key);
+if(!ttisnil(oldval)||
+(tm=fasttm(L,h->metatable,TM_NEWINDEX))==NULL){
+setobj(L,oldval,val);
+h->flags=0;
+luaC_barriert(L,h,val);
+return;
+}
+}
+else if(ttisnil(tm=luaT_gettmbyobj(L,t,TM_NEWINDEX)))
+luaG_typeerror(L,t,"index");
+if(ttisfunction(tm)){
+callTM(L,tm,t,key,val);
+return;
+}
+setobj(L,&temp,tm);
+t=&temp;
+}
+luaG_runerror(L,"loop in settable");
+}
+static int call_binTM(lua_State*L,const TValue*p1,const TValue*p2,
+StkId res,TMS event){
+const TValue*tm=luaT_gettmbyobj(L,p1,event);
+if(ttisnil(tm))
+tm=luaT_gettmbyobj(L,p2,event);
+if(ttisnil(tm))return 0;
+callTMres(L,res,tm,p1,p2);
+return 1;
+}
+static const TValue*get_compTM(lua_State*L,Table*mt1,Table*mt2,
+TMS event){
+const TValue*tm1=fasttm(L,mt1,event);
+const TValue*tm2;
+if(tm1==NULL)return NULL;
+if(mt1==mt2)return tm1;
+tm2=fasttm(L,mt2,event);
+if(tm2==NULL)return NULL;
+if(luaO_rawequalObj(tm1,tm2))
+return tm1;
+return NULL;
+}
+static int call_orderTM(lua_State*L,const TValue*p1,const TValue*p2,
+TMS event){
+const TValue*tm1=luaT_gettmbyobj(L,p1,event);
+const TValue*tm2;
+if(ttisnil(tm1))return-1;
+tm2=luaT_gettmbyobj(L,p2,event);
+if(!luaO_rawequalObj(tm1,tm2))
+return-1;
+callTMres(L,L->top,tm1,p1,p2);
+return!l_isfalse(L->top);
+}
+static int l_strcmp(const TString*ls,const TString*rs){
+const char*l=getstr(ls);
+size_t ll=ls->tsv.len;
+const char*r=getstr(rs);
+size_t lr=rs->tsv.len;
+for(;;){
+int temp=strcoll(l,r);
+if(temp!=0)return temp;
+else{
+size_t len=strlen(l);
+if(len==lr)
+return(len==ll)?0:1;
+else if(len==ll)
+return-1;
+len++;
+l+=len;ll-=len;r+=len;lr-=len;
+}
+}
+}
+static int luaV_lessthan(lua_State*L,const TValue*l,const TValue*r){
+int res;
+if(ttype(l)!=ttype(r))
+return luaG_ordererror(L,l,r);
+else if(ttisnumber(l))
+return luai_numlt(nvalue(l),nvalue(r));
+else if(ttisstring(l))
+return l_strcmp(rawtsvalue(l),rawtsvalue(r))<0;
+else if((res=call_orderTM(L,l,r,TM_LT))!=-1)
+return res;
+return luaG_ordererror(L,l,r);
+}
+static int lessequal(lua_State*L,const TValue*l,const TValue*r){
+int res;
+if(ttype(l)!=ttype(r))
+return luaG_ordererror(L,l,r);
+else if(ttisnumber(l))
+return luai_numle(nvalue(l),nvalue(r));
+else if(ttisstring(l))
+return l_strcmp(rawtsvalue(l),rawtsvalue(r))<=0;
+else if((res=call_orderTM(L,l,r,TM_LE))!=-1)
+return res;
+else if((res=call_orderTM(L,r,l,TM_LT))!=-1)
+return!res;
+return luaG_ordererror(L,l,r);
+}
+static int luaV_equalval(lua_State*L,const TValue*t1,const TValue*t2){
+const TValue*tm;
+switch(ttype(t1)){
+case 0:return 1;
+case 3:return luai_numeq(nvalue(t1),nvalue(t2));
+case 1:return bvalue(t1)==bvalue(t2);
+case 2:return pvalue(t1)==pvalue(t2);
+case 7:{
+if(uvalue(t1)==uvalue(t2))return 1;
+tm=get_compTM(L,uvalue(t1)->metatable,uvalue(t2)->metatable,
+TM_EQ);
+break;
+}
+case 5:{
+if(hvalue(t1)==hvalue(t2))return 1;
+tm=get_compTM(L,hvalue(t1)->metatable,hvalue(t2)->metatable,TM_EQ);
+break;
+}
+default:return gcvalue(t1)==gcvalue(t2);
+}
+if(tm==NULL)return 0;
+callTMres(L,L->top,tm,t1,t2);
+return!l_isfalse(L->top);
+}
+static void luaV_concat(lua_State*L,int total,int last){
+do{
+StkId top=L->base+last+1;
+int n=2;
+if(!(ttisstring(top-2)||ttisnumber(top-2))||!tostring(L,top-1)){
+if(!call_binTM(L,top-2,top-1,top-2,TM_CONCAT))
+luaG_concaterror(L,top-2,top-1);
+}else if(tsvalue(top-1)->len==0)
+(void)tostring(L,top-2);
+else{
+size_t tl=tsvalue(top-1)->len;
+char*buffer;
+int i;
+for(n=1;n<total&&tostring(L,top-n-1);n++){
+size_t l=tsvalue(top-n-1)->len;
+if(l>=((size_t)(~(size_t)0)-2)-tl)luaG_runerror(L,"string length overflow");
+tl+=l;
+}
+buffer=luaZ_openspace(L,&G(L)->buff,tl);
+tl=0;
+for(i=n;i>0;i--){
+size_t l=tsvalue(top-i)->len;
+memcpy(buffer+tl,svalue(top-i),l);
+tl+=l;
+}
+setsvalue(L,top-n,luaS_newlstr(L,buffer,tl));
+}
+total-=n-1;
+last-=n-1;
+}while(total>1);
+}
+static void Arith(lua_State*L,StkId ra,const TValue*rb,
+const TValue*rc,TMS op){
+TValue tempb,tempc;
+const TValue*b,*c;
+if((b=luaV_tonumber(rb,&tempb))!=NULL&&
+(c=luaV_tonumber(rc,&tempc))!=NULL){
+lua_Number nb=nvalue(b),nc=nvalue(c);
+switch(op){
+case TM_ADD:setnvalue(ra,luai_numadd(nb,nc));break;
+case TM_SUB:setnvalue(ra,luai_numsub(nb,nc));break;
+case TM_MUL:setnvalue(ra,luai_nummul(nb,nc));break;
+case TM_DIV:setnvalue(ra,luai_numdiv(nb,nc));break;
+case TM_MOD:setnvalue(ra,luai_nummod(nb,nc));break;
+case TM_POW:setnvalue(ra,luai_numpow(nb,nc));break;
+case TM_UNM:setnvalue(ra,luai_numunm(nb));break;
+default:break;
+}
+}
+else if(!call_binTM(L,rb,rc,ra,op))
+luaG_aritherror(L,rb,rc);
+}
+#define runtime_check(L,c){if(!(c))break;}
+#define RA(i)(base+GETARG_A(i))
+#define RB(i)check_exp(getBMode(GET_OPCODE(i))==OpArgR,base+GETARG_B(i))
+#define RKB(i)check_exp(getBMode(GET_OPCODE(i))==OpArgK,ISK(GETARG_B(i))?k+INDEXK(GETARG_B(i)):base+GETARG_B(i))
+#define RKC(i)check_exp(getCMode(GET_OPCODE(i))==OpArgK,ISK(GETARG_C(i))?k+INDEXK(GETARG_C(i)):base+GETARG_C(i))
+#define KBx(i)check_exp(getBMode(GET_OPCODE(i))==OpArgK,k+GETARG_Bx(i))
+#define dojump(L,pc,i){(pc)+=(i);}
+#define Protect(x){L->savedpc=pc;{x;};base=L->base;}
+#define arith_op(op,tm){TValue*rb=RKB(i);TValue*rc=RKC(i);if(ttisnumber(rb)&&ttisnumber(rc)){lua_Number nb=nvalue(rb),nc=nvalue(rc);setnvalue(ra,op(nb,nc));}else Protect(Arith(L,ra,rb,rc,tm));}
+static void luaV_execute(lua_State*L,int nexeccalls){
+LClosure*cl;
+StkId base;
+TValue*k;
+const Instruction*pc;
+reentry:
+pc=L->savedpc;
+cl=&clvalue(L->ci->func)->l;
+base=L->base;
+k=cl->p->k;
+for(;;){
+const Instruction i=*pc++;
+StkId ra;
+ra=RA(i);
+switch(GET_OPCODE(i)){
+case OP_MOVE:{
+setobj(L,ra,RB(i));
+continue;
+}
+case OP_LOADK:{
+setobj(L,ra,KBx(i));
+continue;
+}
+case OP_LOADBOOL:{
+setbvalue(ra,GETARG_B(i));
+if(GETARG_C(i))pc++;
+continue;
+}
+case OP_LOADNIL:{
+TValue*rb=RB(i);
+do{
+setnilvalue(rb--);
+}while(rb>=ra);
+continue;
+}
+case OP_GETUPVAL:{
+int b=GETARG_B(i);
+setobj(L,ra,cl->upvals[b]->v);
+continue;
+}
+case OP_GETGLOBAL:{
+TValue g;
+TValue*rb=KBx(i);
+sethvalue(L,&g,cl->env);
+Protect(luaV_gettable(L,&g,rb,ra));
+continue;
+}
+case OP_GETTABLE:{
+Protect(luaV_gettable(L,RB(i),RKC(i),ra));
+continue;
+}
+case OP_SETGLOBAL:{
+TValue g;
+sethvalue(L,&g,cl->env);
+Protect(luaV_settable(L,&g,KBx(i),ra));
+continue;
+}
+case OP_SETUPVAL:{
+UpVal*uv=cl->upvals[GETARG_B(i)];
+setobj(L,uv->v,ra);
+luaC_barrier(L,uv,ra);
+continue;
+}
+case OP_SETTABLE:{
+Protect(luaV_settable(L,ra,RKB(i),RKC(i)));
+continue;
+}
+case OP_NEWTABLE:{
+int b=GETARG_B(i);
+int c=GETARG_C(i);
+sethvalue(L,ra,luaH_new(L,luaO_fb2int(b),luaO_fb2int(c)));
+Protect(luaC_checkGC(L));
+continue;
+}
+case OP_SELF:{
+StkId rb=RB(i);
+setobj(L,ra+1,rb);
+Protect(luaV_gettable(L,rb,RKC(i),ra));
+continue;
+}
+case OP_ADD:{
+arith_op(luai_numadd,TM_ADD);
+continue;
+}
+case OP_SUB:{
+arith_op(luai_numsub,TM_SUB);
+continue;
+}
+case OP_MUL:{
+arith_op(luai_nummul,TM_MUL);
+continue;
+}
+case OP_DIV:{
+arith_op(luai_numdiv,TM_DIV);
+continue;
+}
+case OP_MOD:{
+arith_op(luai_nummod,TM_MOD);
+continue;
+}
+case OP_POW:{
+arith_op(luai_numpow,TM_POW);
+continue;
+}
+case OP_UNM:{
+TValue*rb=RB(i);
+if(ttisnumber(rb)){
+lua_Number nb=nvalue(rb);
+setnvalue(ra,luai_numunm(nb));
+}
+else{
+Protect(Arith(L,ra,rb,rb,TM_UNM));
+}
+continue;
+}
+case OP_NOT:{
+int res=l_isfalse(RB(i));
+setbvalue(ra,res);
+continue;
+}
+case OP_LEN:{
+const TValue*rb=RB(i);
+switch(ttype(rb)){
+case 5:{
+setnvalue(ra,cast_num(luaH_getn(hvalue(rb))));
+break;
+}
+case 4:{
+setnvalue(ra,cast_num(tsvalue(rb)->len));
+break;
+}
+default:{
+Protect(
+if(!call_binTM(L,rb,(&luaO_nilobject_),ra,TM_LEN))
+luaG_typeerror(L,rb,"get length of");
+)
+}
+}
+continue;
+}
+case OP_CONCAT:{
+int b=GETARG_B(i);
+int c=GETARG_C(i);
+Protect(luaV_concat(L,c-b+1,c);luaC_checkGC(L));
+setobj(L,RA(i),base+b);
+continue;
+}
+case OP_JMP:{
+dojump(L,pc,GETARG_sBx(i));
+continue;
+}
+case OP_EQ:{
+TValue*rb=RKB(i);
+TValue*rc=RKC(i);
+Protect(
+if(equalobj(L,rb,rc)==GETARG_A(i))
+dojump(L,pc,GETARG_sBx(*pc));
+)
+pc++;
+continue;
+}
+case OP_LT:{
+Protect(
+if(luaV_lessthan(L,RKB(i),RKC(i))==GETARG_A(i))
+dojump(L,pc,GETARG_sBx(*pc));
+)
+pc++;
+continue;
+}
+case OP_LE:{
+Protect(
+if(lessequal(L,RKB(i),RKC(i))==GETARG_A(i))
+dojump(L,pc,GETARG_sBx(*pc));
+)
+pc++;
+continue;
+}
+case OP_TEST:{
+if(l_isfalse(ra)!=GETARG_C(i))
+dojump(L,pc,GETARG_sBx(*pc));
+pc++;
+continue;
+}
+case OP_TESTSET:{
+TValue*rb=RB(i);
+if(l_isfalse(rb)!=GETARG_C(i)){
+setobj(L,ra,rb);
+dojump(L,pc,GETARG_sBx(*pc));
+}
+pc++;
+continue;
+}
+case OP_CALL:{
+int b=GETARG_B(i);
+int nresults=GETARG_C(i)-1;
+if(b!=0)L->top=ra+b;
+L->savedpc=pc;
+switch(luaD_precall(L,ra,nresults)){
+case 0:{
+nexeccalls++;
+goto reentry;
+}
+case 1:{
+if(nresults>=0)L->top=L->ci->top;
+base=L->base;
+continue;
+}
+default:{
+return;
+}
+}
+}
+case OP_TAILCALL:{
+int b=GETARG_B(i);
+if(b!=0)L->top=ra+b;
+L->savedpc=pc;
+switch(luaD_precall(L,ra,(-1))){
+case 0:{
+CallInfo*ci=L->ci-1;
+int aux;
+StkId func=ci->func;
+StkId pfunc=(ci+1)->func;
+if(L->openupval)luaF_close(L,ci->base);
+L->base=ci->base=ci->func+((ci+1)->base-pfunc);
+for(aux=0;pfunc+aux<L->top;aux++)
+setobj(L,func+aux,pfunc+aux);
+ci->top=L->top=func+aux;
+ci->savedpc=L->savedpc;
+ci->tailcalls++;
+L->ci--;
+goto reentry;
+}
+case 1:{
+base=L->base;
+continue;
+}
+default:{
+return;
+}
+}
+}
+case OP_RETURN:{
+int b=GETARG_B(i);
+if(b!=0)L->top=ra+b-1;
+if(L->openupval)luaF_close(L,base);
+L->savedpc=pc;
+b=luaD_poscall(L,ra);
+if(--nexeccalls==0)
+return;
+else{
+if(b)L->top=L->ci->top;
+goto reentry;
+}
+}
+case OP_FORLOOP:{
+lua_Number step=nvalue(ra+2);
+lua_Number idx=luai_numadd(nvalue(ra),step);
+lua_Number limit=nvalue(ra+1);
+if(luai_numlt(0,step)?luai_numle(idx,limit)
+:luai_numle(limit,idx)){
+dojump(L,pc,GETARG_sBx(i));
+setnvalue(ra,idx);
+setnvalue(ra+3,idx);
+}
+continue;
+}
+case OP_FORPREP:{
+const TValue*init=ra;
+const TValue*plimit=ra+1;
+const TValue*pstep=ra+2;
+L->savedpc=pc;
+if(!tonumber(init,ra))
+luaG_runerror(L,LUA_QL("for")" initial value must be a number");
+else if(!tonumber(plimit,ra+1))
+luaG_runerror(L,LUA_QL("for")" limit must be a number");
+else if(!tonumber(pstep,ra+2))
+luaG_runerror(L,LUA_QL("for")" step must be a number");
+setnvalue(ra,luai_numsub(nvalue(ra),nvalue(pstep)));
+dojump(L,pc,GETARG_sBx(i));
+continue;
+}
+case OP_TFORLOOP:{
+StkId cb=ra+3;
+setobj(L,cb+2,ra+2);
+setobj(L,cb+1,ra+1);
+setobj(L,cb,ra);
+L->top=cb+3;
+Protect(luaD_call(L,cb,GETARG_C(i)));
+L->top=L->ci->top;
+cb=RA(i)+3;
+if(!ttisnil(cb)){
+setobj(L,cb-1,cb);
+dojump(L,pc,GETARG_sBx(*pc));
+}
+pc++;
+continue;
+}
+case OP_SETLIST:{
+int n=GETARG_B(i);
+int c=GETARG_C(i);
+int last;
+Table*h;
+if(n==0){
+n=cast_int(L->top-ra)-1;
+L->top=L->ci->top;
+}
+if(c==0)c=cast_int(*pc++);
+runtime_check(L,ttistable(ra));
+h=hvalue(ra);
+last=((c-1)*50)+n;
+if(last>h->sizearray)
+luaH_resizearray(L,h,last);
+for(;n>0;n--){
+TValue*val=ra+n;
+setobj(L,luaH_setnum(L,h,last--),val);
+luaC_barriert(L,h,val);
+}
+continue;
+}
+case OP_CLOSE:{
+luaF_close(L,ra);
+continue;
+}
+case OP_CLOSURE:{
+Proto*p;
+Closure*ncl;
+int nup,j;
+p=cl->p->p[GETARG_Bx(i)];
+nup=p->nups;
+ncl=luaF_newLclosure(L,nup,cl->env);
+ncl->l.p=p;
+for(j=0;j<nup;j++,pc++){
+if(GET_OPCODE(*pc)==OP_GETUPVAL)
+ncl->l.upvals[j]=cl->upvals[GETARG_B(*pc)];
+else{
+ncl->l.upvals[j]=luaF_findupval(L,base+GETARG_B(*pc));
+}
+}
+setclvalue(L,ra,ncl);
+Protect(luaC_checkGC(L));
+continue;
+}
+case OP_VARARG:{
+int b=GETARG_B(i)-1;
+int j;
+CallInfo*ci=L->ci;
+int n=cast_int(ci->base-ci->func)-cl->p->numparams-1;
+if(b==(-1)){
+Protect(luaD_checkstack(L,n));
+ra=RA(i);
+b=n;
+L->top=ra+n;
+}
+for(j=0;j<b;j++){
+if(j<n){
+setobj(L,ra+j,ci->base-n+j);
+}
+else{
+setnilvalue(ra+j);
+}
+}
+continue;
+}
+}
+}
+}
+#define api_checknelems(L,n)luai_apicheck(L,(n)<=(L->top-L->base))
+#define api_checkvalidindex(L,i)luai_apicheck(L,(i)!=(&luaO_nilobject_))
+#define api_incr_top(L){luai_apicheck(L,L->top<L->ci->top);L->top++;}
+static TValue*index2adr(lua_State*L,int idx){
+if(idx>0){
+TValue*o=L->base+(idx-1);
+luai_apicheck(L,idx<=L->ci->top-L->base);
+if(o>=L->top)return cast(TValue*,(&luaO_nilobject_));
+else return o;
+}
+else if(idx>(-10000)){
+luai_apicheck(L,idx!=0&&-idx<=L->top-L->base);
+return L->top+idx;
+}
+else switch(idx){
+case(-10000):return registry(L);
+case(-10001):{
+Closure*func=curr_func(L);
+sethvalue(L,&L->env,func->c.env);
+return&L->env;
+}
+case(-10002):return gt(L);
+default:{
+Closure*func=curr_func(L);
+idx=(-10002)-idx;
+return(idx<=func->c.nupvalues)
+?&func->c.upvalue[idx-1]
+:cast(TValue*,(&luaO_nilobject_));
+}
+}
+}
+static Table*getcurrenv(lua_State*L){
+if(L->ci==L->base_ci)
+return hvalue(gt(L));
+else{
+Closure*func=curr_func(L);
+return func->c.env;
+}
+}
+static int lua_checkstack(lua_State*L,int size){
+int res=1;
+if(size>8000||(L->top-L->base+size)>8000)
+res=0;
+else if(size>0){
+luaD_checkstack(L,size);
+if(L->ci->top<L->top+size)
+L->ci->top=L->top+size;
+}
+return res;
+}
+static lua_CFunction lua_atpanic(lua_State*L,lua_CFunction panicf){
+lua_CFunction old;
+old=G(L)->panic;
+G(L)->panic=panicf;
+return old;
+}
+static int lua_gettop(lua_State*L){
+return cast_int(L->top-L->base);
+}
+static void lua_settop(lua_State*L,int idx){
+if(idx>=0){
+luai_apicheck(L,idx<=L->stack_last-L->base);
+while(L->top<L->base+idx)
+setnilvalue(L->top++);
+L->top=L->base+idx;
+}
+else{
+luai_apicheck(L,-(idx+1)<=(L->top-L->base));
+L->top+=idx+1;
+}
+}
+static void lua_remove(lua_State*L,int idx){
+StkId p;
+p=index2adr(L,idx);
+api_checkvalidindex(L,p);
+while(++p<L->top)setobj(L,p-1,p);
+L->top--;
+}
+static void lua_insert(lua_State*L,int idx){
+StkId p;
+StkId q;
+p=index2adr(L,idx);
+api_checkvalidindex(L,p);
+for(q=L->top;q>p;q--)setobj(L,q,q-1);
+setobj(L,p,L->top);
+}
+static void lua_replace(lua_State*L,int idx){
+StkId o;
+if(idx==(-10001)&&L->ci==L->base_ci)
+luaG_runerror(L,"no calling environment");
+api_checknelems(L,1);
+o=index2adr(L,idx);
+api_checkvalidindex(L,o);
+if(idx==(-10001)){
+Closure*func=curr_func(L);
+luai_apicheck(L,ttistable(L->top-1));
+func->c.env=hvalue(L->top-1);
+luaC_barrier(L,func,L->top-1);
+}
+else{
+setobj(L,o,L->top-1);
+if(idx<(-10002))
+luaC_barrier(L,curr_func(L),L->top-1);
+}
+L->top--;
+}
+static void lua_pushvalue(lua_State*L,int idx){
+setobj(L,L->top,index2adr(L,idx));
+api_incr_top(L);
+}
+static int lua_type(lua_State*L,int idx){
+StkId o=index2adr(L,idx);
+return(o==(&luaO_nilobject_))?(-1):ttype(o);
+}
+static const char*lua_typename(lua_State*L,int t){
+UNUSED(L);
+return(t==(-1))?"no value":luaT_typenames[t];
+}
+static int lua_iscfunction(lua_State*L,int idx){
+StkId o=index2adr(L,idx);
+return iscfunction(o);
+}
+static int lua_isnumber(lua_State*L,int idx){
+TValue n;
+const TValue*o=index2adr(L,idx);
+return tonumber(o,&n);
+}
+static int lua_isstring(lua_State*L,int idx){
+int t=lua_type(L,idx);
+return(t==4||t==3);
+}
+static int lua_rawequal(lua_State*L,int index1,int index2){
+StkId o1=index2adr(L,index1);
+StkId o2=index2adr(L,index2);
+return(o1==(&luaO_nilobject_)||o2==(&luaO_nilobject_))?0
+:luaO_rawequalObj(o1,o2);
+}
+static int lua_lessthan(lua_State*L,int index1,int index2){
+StkId o1,o2;
+int i;
+o1=index2adr(L,index1);
+o2=index2adr(L,index2);
+i=(o1==(&luaO_nilobject_)||o2==(&luaO_nilobject_))?0
+:luaV_lessthan(L,o1,o2);
+return i;
+}
+static lua_Number lua_tonumber(lua_State*L,int idx){
+TValue n;
+const TValue*o=index2adr(L,idx);
+if(tonumber(o,&n))
+return nvalue(o);
+else
+return 0;
+}
+static lua_Integer lua_tointeger(lua_State*L,int idx){
+TValue n;
+const TValue*o=index2adr(L,idx);
+if(tonumber(o,&n)){
+lua_Integer res;
+lua_Number num=nvalue(o);
+lua_number2integer(res,num);
+return res;
+}
+else
+return 0;
+}
+static int lua_toboolean(lua_State*L,int idx){
+const TValue*o=index2adr(L,idx);
+return!l_isfalse(o);
+}
+static const char*lua_tolstring(lua_State*L,int idx,size_t*len){
+StkId o=index2adr(L,idx);
+if(!ttisstring(o)){
+if(!luaV_tostring(L,o)){
+if(len!=NULL)*len=0;
+return NULL;
+}
+luaC_checkGC(L);
+o=index2adr(L,idx);
+}
+if(len!=NULL)*len=tsvalue(o)->len;
+return svalue(o);
+}
+static size_t lua_objlen(lua_State*L,int idx){
+StkId o=index2adr(L,idx);
+switch(ttype(o)){
+case 4:return tsvalue(o)->len;
+case 7:return uvalue(o)->len;
+case 5:return luaH_getn(hvalue(o));
+case 3:{
+size_t l;
+l=(luaV_tostring(L,o)?tsvalue(o)->len:0);
+return l;
+}
+default:return 0;
+}
+}
+static lua_CFunction lua_tocfunction(lua_State*L,int idx){
+StkId o=index2adr(L,idx);
+return(!iscfunction(o))?NULL:clvalue(o)->c.f;
+}
+static void*lua_touserdata(lua_State*L,int idx){
+StkId o=index2adr(L,idx);
+switch(ttype(o)){
+case 7:return(rawuvalue(o)+1);
+case 2:return pvalue(o);
+default:return NULL;
+}
+}
+static void lua_pushnil(lua_State*L){
+setnilvalue(L->top);
+api_incr_top(L);
+}
+static void lua_pushnumber(lua_State*L,lua_Number n){
+setnvalue(L->top,n);
+api_incr_top(L);
+}
+static void lua_pushinteger(lua_State*L,lua_Integer n){
+setnvalue(L->top,cast_num(n));
+api_incr_top(L);
+}
+static void lua_pushlstring(lua_State*L,const char*s,size_t len){
+luaC_checkGC(L);
+setsvalue(L,L->top,luaS_newlstr(L,s,len));
+api_incr_top(L);
+}
+static void lua_pushstring(lua_State*L,const char*s){
+if(s==NULL)
+lua_pushnil(L);
+else
+lua_pushlstring(L,s,strlen(s));
+}
+static const char*lua_pushvfstring(lua_State*L,const char*fmt,
+va_list argp){
+const char*ret;
+luaC_checkGC(L);
+ret=luaO_pushvfstring(L,fmt,argp);
+return ret;
+}
+static const char*lua_pushfstring(lua_State*L,const char*fmt,...){
+const char*ret;
+va_list argp;
+luaC_checkGC(L);
+va_start(argp,fmt);
+ret=luaO_pushvfstring(L,fmt,argp);
+va_end(argp);
+return ret;
+}
+static void lua_pushcclosure(lua_State*L,lua_CFunction fn,int n){
+Closure*cl;
+luaC_checkGC(L);
+api_checknelems(L,n);
+cl=luaF_newCclosure(L,n,getcurrenv(L));
+cl->c.f=fn;
+L->top-=n;
+while(n--)
+setobj(L,&cl->c.upvalue[n],L->top+n);
+setclvalue(L,L->top,cl);
+api_incr_top(L);
+}
+static void lua_pushboolean(lua_State*L,int b){
+setbvalue(L->top,(b!=0));
+api_incr_top(L);
+}
+static int lua_pushthread(lua_State*L){
+setthvalue(L,L->top,L);
+api_incr_top(L);
+return(G(L)->mainthread==L);
+}
+static void lua_gettable(lua_State*L,int idx){
+StkId t;
+t=index2adr(L,idx);
+api_checkvalidindex(L,t);
+luaV_gettable(L,t,L->top-1,L->top-1);
+}
+static void lua_getfield(lua_State*L,int idx,const char*k){
+StkId t;
+TValue key;
+t=index2adr(L,idx);
+api_checkvalidindex(L,t);
+setsvalue(L,&key,luaS_new(L,k));
+luaV_gettable(L,t,&key,L->top);
+api_incr_top(L);
+}
+static void lua_rawget(lua_State*L,int idx){
+StkId t;
+t=index2adr(L,idx);
+luai_apicheck(L,ttistable(t));
+setobj(L,L->top-1,luaH_get(hvalue(t),L->top-1));
+}
+static void lua_rawgeti(lua_State*L,int idx,int n){
+StkId o;
+o=index2adr(L,idx);
+luai_apicheck(L,ttistable(o));
+setobj(L,L->top,luaH_getnum(hvalue(o),n));
+api_incr_top(L);
+}
+static void lua_createtable(lua_State*L,int narray,int nrec){
+luaC_checkGC(L);
+sethvalue(L,L->top,luaH_new(L,narray,nrec));
+api_incr_top(L);
+}
+static int lua_getmetatable(lua_State*L,int objindex){
+const TValue*obj;
+Table*mt=NULL;
+int res;
+obj=index2adr(L,objindex);
+switch(ttype(obj)){
+case 5:
+mt=hvalue(obj)->metatable;
+break;
+case 7:
+mt=uvalue(obj)->metatable;
+break;
+default:
+mt=G(L)->mt[ttype(obj)];
+break;
+}
+if(mt==NULL)
+res=0;
+else{
+sethvalue(L,L->top,mt);
+api_incr_top(L);
+res=1;
+}
+return res;
+}
+static void lua_getfenv(lua_State*L,int idx){
+StkId o;
+o=index2adr(L,idx);
+api_checkvalidindex(L,o);
+switch(ttype(o)){
+case 6:
+sethvalue(L,L->top,clvalue(o)->c.env);
+break;
+case 7:
+sethvalue(L,L->top,uvalue(o)->env);
+break;
+case 8:
+setobj(L,L->top,gt(thvalue(o)));
+break;
+default:
+setnilvalue(L->top);
+break;
+}
+api_incr_top(L);
+}
+static void lua_settable(lua_State*L,int idx){
+StkId t;
+api_checknelems(L,2);
+t=index2adr(L,idx);
+api_checkvalidindex(L,t);
+luaV_settable(L,t,L->top-2,L->top-1);
+L->top-=2;
+}
+static void lua_setfield(lua_State*L,int idx,const char*k){
+StkId t;
+TValue key;
+api_checknelems(L,1);
+t=index2adr(L,idx);
+api_checkvalidindex(L,t);
+setsvalue(L,&key,luaS_new(L,k));
+luaV_settable(L,t,&key,L->top-1);
+L->top--;
+}
+static void lua_rawset(lua_State*L,int idx){
+StkId t;
+api_checknelems(L,2);
+t=index2adr(L,idx);
+luai_apicheck(L,ttistable(t));
+setobj(L,luaH_set(L,hvalue(t),L->top-2),L->top-1);
+luaC_barriert(L,hvalue(t),L->top-1);
+L->top-=2;
+}
+static void lua_rawseti(lua_State*L,int idx,int n){
+StkId o;
+api_checknelems(L,1);
+o=index2adr(L,idx);
+luai_apicheck(L,ttistable(o));
+setobj(L,luaH_setnum(L,hvalue(o),n),L->top-1);
+luaC_barriert(L,hvalue(o),L->top-1);
+L->top--;
+}
+static int lua_setmetatable(lua_State*L,int objindex){
+TValue*obj;
+Table*mt;
+api_checknelems(L,1);
+obj=index2adr(L,objindex);
+api_checkvalidindex(L,obj);
+if(ttisnil(L->top-1))
+mt=NULL;
+else{
+luai_apicheck(L,ttistable(L->top-1));
+mt=hvalue(L->top-1);
+}
+switch(ttype(obj)){
+case 5:{
+hvalue(obj)->metatable=mt;
+if(mt)
+luaC_objbarriert(L,hvalue(obj),mt);
+break;
+}
+case 7:{
+uvalue(obj)->metatable=mt;
+if(mt)
+luaC_objbarrier(L,rawuvalue(obj),mt);
+break;
+}
+default:{
+G(L)->mt[ttype(obj)]=mt;
+break;
+}
+}
+L->top--;
+return 1;
+}
+static int lua_setfenv(lua_State*L,int idx){
+StkId o;
+int res=1;
+api_checknelems(L,1);
+o=index2adr(L,idx);
+api_checkvalidindex(L,o);
+luai_apicheck(L,ttistable(L->top-1));
+switch(ttype(o)){
+case 6:
+clvalue(o)->c.env=hvalue(L->top-1);
+break;
+case 7:
+uvalue(o)->env=hvalue(L->top-1);
+break;
+case 8:
+sethvalue(L,gt(thvalue(o)),hvalue(L->top-1));
+break;
+default:
+res=0;
+break;
+}
+if(res)luaC_objbarrier(L,gcvalue(o),hvalue(L->top-1));
+L->top--;
+return res;
+}
+#define adjustresults(L,nres){if(nres==(-1)&&L->top>=L->ci->top)L->ci->top=L->top;}
+#define checkresults(L,na,nr)luai_apicheck(L,(nr)==(-1)||(L->ci->top-L->top>=(nr)-(na)))
+static void lua_call(lua_State*L,int nargs,int nresults){
+StkId func;
+api_checknelems(L,nargs+1);
+checkresults(L,nargs,nresults);
+func=L->top-(nargs+1);
+luaD_call(L,func,nresults);
+adjustresults(L,nresults);
+}
+struct CallS{
+StkId func;
+int nresults;
+};
+static void f_call(lua_State*L,void*ud){
+struct CallS*c=cast(struct CallS*,ud);
+luaD_call(L,c->func,c->nresults);
+}
+static int lua_pcall(lua_State*L,int nargs,int nresults,int errfunc){
+struct CallS c;
+int status;
+ptrdiff_t func;
+api_checknelems(L,nargs+1);
+checkresults(L,nargs,nresults);
+if(errfunc==0)
+func=0;
+else{
+StkId o=index2adr(L,errfunc);
+api_checkvalidindex(L,o);
+func=savestack(L,o);
+}
+c.func=L->top-(nargs+1);
+c.nresults=nresults;
+status=luaD_pcall(L,f_call,&c,savestack(L,c.func),func);
+adjustresults(L,nresults);
+return status;
+}
+static int lua_load(lua_State*L,lua_Reader reader,void*data,
+const char*chunkname){
+ZIO z;
+int status;
+if(!chunkname)chunkname="?";
+luaZ_init(L,&z,reader,data);
+status=luaD_protectedparser(L,&z,chunkname);
+return status;
+}
+static int lua_error(lua_State*L){
+api_checknelems(L,1);
+luaG_errormsg(L);
+return 0;
+}
+static int lua_next(lua_State*L,int idx){
+StkId t;
+int more;
+t=index2adr(L,idx);
+luai_apicheck(L,ttistable(t));
+more=luaH_next(L,hvalue(t),L->top-1);
+if(more){
+api_incr_top(L);
+}
+else
+L->top-=1;
+return more;
+}
+static void lua_concat(lua_State*L,int n){
+api_checknelems(L,n);
+if(n>=2){
+luaC_checkGC(L);
+luaV_concat(L,n,cast_int(L->top-L->base)-1);
+L->top-=(n-1);
+}
+else if(n==0){
+setsvalue(L,L->top,luaS_newlstr(L,"",0));
+api_incr_top(L);
+}
+}
+static void*lua_newuserdata(lua_State*L,size_t size){
+Udata*u;
+luaC_checkGC(L);
+u=luaS_newudata(L,size,getcurrenv(L));
+setuvalue(L,L->top,u);
+api_incr_top(L);
+return u+1;
+}
+#define luaL_getn(L,i)((int)lua_objlen(L,i))
+#define luaL_setn(L,i,j)((void)0)
+typedef struct luaL_Reg{
+const char*name;
+lua_CFunction func;
+}luaL_Reg;
+static void luaI_openlib(lua_State*L,const char*libname,
+const luaL_Reg*l,int nup);
+static int luaL_argerror(lua_State*L,int numarg,const char*extramsg);
+static const char* luaL_checklstring(lua_State*L,int numArg,
+size_t*l);
+static const char* luaL_optlstring(lua_State*L,int numArg,
+const char*def,size_t*l);
+static lua_Integer luaL_checkinteger(lua_State*L,int numArg);
+static lua_Integer luaL_optinteger(lua_State*L,int nArg,
+lua_Integer def);
+static int luaL_error(lua_State*L,const char*fmt,...);
+static const char* luaL_findtable(lua_State*L,int idx,
+const char*fname,int szhint);
+#define luaL_argcheck(L,cond,numarg,extramsg)((void)((cond)||luaL_argerror(L,(numarg),(extramsg))))
+#define luaL_checkstring(L,n)(luaL_checklstring(L,(n),NULL))
+#define luaL_optstring(L,n,d)(luaL_optlstring(L,(n),(d),NULL))
+#define luaL_checkint(L,n)((int)luaL_checkinteger(L,(n)))
+#define luaL_optint(L,n,d)((int)luaL_optinteger(L,(n),(d)))
+#define luaL_typename(L,i)lua_typename(L,lua_type(L,(i)))
+#define luaL_getmetatable(L,n)(lua_getfield(L,(-10000),(n)))
+#define luaL_opt(L,f,n,d)(lua_isnoneornil(L,(n))?(d):f(L,(n)))
+typedef struct luaL_Buffer{
+char*p;
+int lvl;
+lua_State*L;
+char buffer[BUFSIZ];
+}luaL_Buffer;
+#define luaL_addchar(B,c)((void)((B)->p<((B)->buffer+BUFSIZ)||luaL_prepbuffer(B)),(*(B)->p++=(char)(c)))
+#define luaL_addsize(B,n)((B)->p+=(n))
+static char* luaL_prepbuffer(luaL_Buffer*B);
+static int luaL_argerror(lua_State*L,int narg,const char*extramsg){
+lua_Debug ar;
+if(!lua_getstack(L,0,&ar))
+return luaL_error(L,"bad argument #%d (%s)",narg,extramsg);
+lua_getinfo(L,"n",&ar);
+if(strcmp(ar.namewhat,"method")==0){
+narg--;
+if(narg==0)
+return luaL_error(L,"calling "LUA_QL("%s")" on bad self (%s)",
+ar.name,extramsg);
+}
+if(ar.name==NULL)
+ar.name="?";
+return luaL_error(L,"bad argument #%d to "LUA_QL("%s")" (%s)",
+narg,ar.name,extramsg);
+}
+static int luaL_typerror(lua_State*L,int narg,const char*tname){
+const char*msg=lua_pushfstring(L,"%s expected, got %s",
+tname,luaL_typename(L,narg));
+return luaL_argerror(L,narg,msg);
+}
+static void tag_error(lua_State*L,int narg,int tag){
+luaL_typerror(L,narg,lua_typename(L,tag));
+}
+static void luaL_where(lua_State*L,int level){
+lua_Debug ar;
+if(lua_getstack(L,level,&ar)){
+lua_getinfo(L,"Sl",&ar);
+if(ar.currentline>0){
+lua_pushfstring(L,"%s:%d: ",ar.short_src,ar.currentline);
+return;
+}
+}
+lua_pushliteral(L,"");
+}
+static int luaL_error(lua_State*L,const char*fmt,...){
+va_list argp;
+va_start(argp,fmt);
+luaL_where(L,1);
+lua_pushvfstring(L,fmt,argp);
+va_end(argp);
+lua_concat(L,2);
+return lua_error(L);
+}
+static int luaL_newmetatable(lua_State*L,const char*tname){
+lua_getfield(L,(-10000),tname);
+if(!lua_isnil(L,-1))
+return 0;
+lua_pop(L,1);
+lua_newtable(L);
+lua_pushvalue(L,-1);
+lua_setfield(L,(-10000),tname);
+return 1;
+}
+static void*luaL_checkudata(lua_State*L,int ud,const char*tname){
+void*p=lua_touserdata(L,ud);
+if(p!=NULL){
+if(lua_getmetatable(L,ud)){
+lua_getfield(L,(-10000),tname);
+if(lua_rawequal(L,-1,-2)){
+lua_pop(L,2);
+return p;
+}
+}
+}
+luaL_typerror(L,ud,tname);
+return NULL;
+}
+static void luaL_checkstack(lua_State*L,int space,const char*mes){
+if(!lua_checkstack(L,space))
+luaL_error(L,"stack overflow (%s)",mes);
+}
+static void luaL_checktype(lua_State*L,int narg,int t){
+if(lua_type(L,narg)!=t)
+tag_error(L,narg,t);
+}
+static void luaL_checkany(lua_State*L,int narg){
+if(lua_type(L,narg)==(-1))
+luaL_argerror(L,narg,"value expected");
+}
+static const char*luaL_checklstring(lua_State*L,int narg,size_t*len){
+const char*s=lua_tolstring(L,narg,len);
+if(!s)tag_error(L,narg,4);
+return s;
+}
+static const char*luaL_optlstring(lua_State*L,int narg,
+const char*def,size_t*len){
+if(lua_isnoneornil(L,narg)){
+if(len)
+*len=(def?strlen(def):0);
+return def;
+}
+else return luaL_checklstring(L,narg,len);
+}
+static lua_Number luaL_checknumber(lua_State*L,int narg){
+lua_Number d=lua_tonumber(L,narg);
+if(d==0&&!lua_isnumber(L,narg))
+tag_error(L,narg,3);
+return d;
+}
+static lua_Integer luaL_checkinteger(lua_State*L,int narg){
+lua_Integer d=lua_tointeger(L,narg);
+if(d==0&&!lua_isnumber(L,narg))
+tag_error(L,narg,3);
+return d;
+}
+static lua_Integer luaL_optinteger(lua_State*L,int narg,
+lua_Integer def){
+return luaL_opt(L,luaL_checkinteger,narg,def);
+}
+static int luaL_getmetafield(lua_State*L,int obj,const char*event){
+if(!lua_getmetatable(L,obj))
+return 0;
+lua_pushstring(L,event);
+lua_rawget(L,-2);
+if(lua_isnil(L,-1)){
+lua_pop(L,2);
+return 0;
+}
+else{
+lua_remove(L,-2);
+return 1;
+}
+}
+static void luaL_register(lua_State*L,const char*libname,
+const luaL_Reg*l){
+luaI_openlib(L,libname,l,0);
+}
+static int libsize(const luaL_Reg*l){
+int size=0;
+for(;l->name;l++)size++;
+return size;
+}
+static void luaI_openlib(lua_State*L,const char*libname,
+const luaL_Reg*l,int nup){
+if(libname){
+int size=libsize(l);
+luaL_findtable(L,(-10000),"_LOADED",1);
+lua_getfield(L,-1,libname);
+if(!lua_istable(L,-1)){
+lua_pop(L,1);
+if(luaL_findtable(L,(-10002),libname,size)!=NULL)
+luaL_error(L,"name conflict for module "LUA_QL("%s"),libname);
+lua_pushvalue(L,-1);
+lua_setfield(L,-3,libname);
+}
+lua_remove(L,-2);
+lua_insert(L,-(nup+1));
+}
+for(;l->name;l++){
+int i;
+for(i=0;i<nup;i++)
+lua_pushvalue(L,-nup);
+lua_pushcclosure(L,l->func,nup);
+lua_setfield(L,-(nup+2),l->name);
+}
+lua_pop(L,nup);
+}
+static const char*luaL_findtable(lua_State*L,int idx,
+const char*fname,int szhint){
+const char*e;
+lua_pushvalue(L,idx);
+do{
+e=strchr(fname,'.');
+if(e==NULL)e=fname+strlen(fname);
+lua_pushlstring(L,fname,e-fname);
+lua_rawget(L,-2);
+if(lua_isnil(L,-1)){
+lua_pop(L,1);
+lua_createtable(L,0,(*e=='.'?1:szhint));
+lua_pushlstring(L,fname,e-fname);
+lua_pushvalue(L,-2);
+lua_settable(L,-4);
+}
+else if(!lua_istable(L,-1)){
+lua_pop(L,2);
+return fname;
+}
+lua_remove(L,-2);
+fname=e+1;
+}while(*e=='.');
+return NULL;
+}
+#define bufflen(B)((B)->p-(B)->buffer)
+#define bufffree(B)((size_t)(BUFSIZ-bufflen(B)))
+static int emptybuffer(luaL_Buffer*B){
+size_t l=bufflen(B);
+if(l==0)return 0;
+else{
+lua_pushlstring(B->L,B->buffer,l);
+B->p=B->buffer;
+B->lvl++;
+return 1;
+}
+}
+static void adjuststack(luaL_Buffer*B){
+if(B->lvl>1){
+lua_State*L=B->L;
+int toget=1;
+size_t toplen=lua_strlen(L,-1);
+do{
+size_t l=lua_strlen(L,-(toget+1));
+if(B->lvl-toget+1>=(20/2)||toplen>l){
+toplen+=l;
+toget++;
+}
+else break;
+}while(toget<B->lvl);
+lua_concat(L,toget);
+B->lvl=B->lvl-toget+1;
+}
+}
+static char*luaL_prepbuffer(luaL_Buffer*B){
+if(emptybuffer(B))
+adjuststack(B);
+return B->buffer;
+}
+static void luaL_addlstring(luaL_Buffer*B,const char*s,size_t l){
+while(l--)
+luaL_addchar(B,*s++);
+}
+static void luaL_pushresult(luaL_Buffer*B){
+emptybuffer(B);
+lua_concat(B->L,B->lvl);
+B->lvl=1;
+}
+static void luaL_addvalue(luaL_Buffer*B){
+lua_State*L=B->L;
+size_t vl;
+const char*s=lua_tolstring(L,-1,&vl);
+if(vl<=bufffree(B)){
+memcpy(B->p,s,vl);
+B->p+=vl;
+lua_pop(L,1);
+}
+else{
+if(emptybuffer(B))
+lua_insert(L,-2);
+B->lvl++;
+adjuststack(B);
+}
+}
+static void luaL_buffinit(lua_State*L,luaL_Buffer*B){
+B->L=L;
+B->p=B->buffer;
+B->lvl=0;
+}
+typedef struct LoadF{
+int extraline;
+FILE*f;
+char buff[BUFSIZ];
+}LoadF;
+static const char*getF(lua_State*L,void*ud,size_t*size){
+LoadF*lf=(LoadF*)ud;
+(void)L;
+if(lf->extraline){
+lf->extraline=0;
+*size=1;
+return"\n";
+}
+if(feof(lf->f))return NULL;
+*size=fread(lf->buff,1,sizeof(lf->buff),lf->f);
+return(*size>0)?lf->buff:NULL;
+}
+static int errfile(lua_State*L,const char*what,int fnameindex){
+const char*serr=strerror(errno);
+const char*filename=lua_tostring(L,fnameindex)+1;
+lua_pushfstring(L,"cannot %s %s: %s",what,filename,serr);
+lua_remove(L,fnameindex);
+return(5+1);
+}
+static int luaL_loadfile(lua_State*L,const char*filename){
+LoadF lf;
+int status,readstatus;
+int c;
+int fnameindex=lua_gettop(L)+1;
+lf.extraline=0;
+if(filename==NULL){
+lua_pushliteral(L,"=stdin");
+lf.f=stdin;
+}
+else{
+lua_pushfstring(L,"@%s",filename);
+lf.f=fopen(filename,"r");
+if(lf.f==NULL)return errfile(L,"open",fnameindex);
+}
+c=getc(lf.f);
+if(c=='#'){
+lf.extraline=1;
+while((c=getc(lf.f))!=EOF&&c!='\n');
+if(c=='\n')c=getc(lf.f);
+}
+if(c=="\033Lua"[0]&&filename){
+lf.f=freopen(filename,"rb",lf.f);
+if(lf.f==NULL)return errfile(L,"reopen",fnameindex);
+while((c=getc(lf.f))!=EOF&&c!="\033Lua"[0]);
+lf.extraline=0;
+}
+ungetc(c,lf.f);
+status=lua_load(L,getF,&lf,lua_tostring(L,-1));
+readstatus=ferror(lf.f);
+if(filename)fclose(lf.f);
+if(readstatus){
+lua_settop(L,fnameindex);
+return errfile(L,"read",fnameindex);
+}
+lua_remove(L,fnameindex);
+return status;
+}
+typedef struct LoadS{
+const char*s;
+size_t size;
+}LoadS;
+static const char*getS(lua_State*L,void*ud,size_t*size){
+LoadS*ls=(LoadS*)ud;
+(void)L;
+if(ls->size==0)return NULL;
+*size=ls->size;
+ls->size=0;
+return ls->s;
+}
+static int luaL_loadbuffer(lua_State*L,const char*buff,size_t size,
+const char*name){
+LoadS ls;
+ls.s=buff;
+ls.size=size;
+return lua_load(L,getS,&ls,name);
+}
+static void*l_alloc(void*ud,void*ptr,size_t osize,size_t nsize){
+(void)ud;
+(void)osize;
+if(nsize==0){
+free(ptr);
+return NULL;
+}
+else
+return realloc(ptr,nsize);
+}
+static int panic(lua_State*L){
+(void)L;
+fprintf(stderr,"PANIC: unprotected error in call to Lua API (%s)\n",
+lua_tostring(L,-1));
+return 0;
+}
+static lua_State*luaL_newstate(void){
+lua_State*L=lua_newstate(l_alloc,NULL);
+if(L)lua_atpanic(L,&panic);
+return L;
+}
+static int luaB_tonumber(lua_State*L){
+int base=luaL_optint(L,2,10);
+if(base==10){
+luaL_checkany(L,1);
+if(lua_isnumber(L,1)){
+lua_pushnumber(L,lua_tonumber(L,1));
+return 1;
+}
+}
+else{
+const char*s1=luaL_checkstring(L,1);
+char*s2;
+unsigned long n;
+luaL_argcheck(L,2<=base&&base<=36,2,"base out of range");
+n=strtoul(s1,&s2,base);
+if(s1!=s2){
+while(isspace((unsigned char)(*s2)))s2++;
+if(*s2=='\0'){
+lua_pushnumber(L,(lua_Number)n);
+return 1;
+}
+}
+}
+lua_pushnil(L);
+return 1;
+}
+static int luaB_error(lua_State*L){
+int level=luaL_optint(L,2,1);
+lua_settop(L,1);
+if(lua_isstring(L,1)&&level>0){
+luaL_where(L,level);
+lua_pushvalue(L,1);
+lua_concat(L,2);
+}
+return lua_error(L);
+}
+static int luaB_setmetatable(lua_State*L){
+int t=lua_type(L,2);
+luaL_checktype(L,1,5);
+luaL_argcheck(L,t==0||t==5,2,
+"nil or table expected");
+if(luaL_getmetafield(L,1,"__metatable"))
+luaL_error(L,"cannot change a protected metatable");
+lua_settop(L,2);
+lua_setmetatable(L,1);
+return 1;
+}
+static void getfunc(lua_State*L,int opt){
+if(lua_isfunction(L,1))lua_pushvalue(L,1);
+else{
+lua_Debug ar;
+int level=opt?luaL_optint(L,1,1):luaL_checkint(L,1);
+luaL_argcheck(L,level>=0,1,"level must be non-negative");
+if(lua_getstack(L,level,&ar)==0)
+luaL_argerror(L,1,"invalid level");
+lua_getinfo(L,"f",&ar);
+if(lua_isnil(L,-1))
+luaL_error(L,"no function environment for tail call at level %d",
+level);
+}
+}
+static int luaB_setfenv(lua_State*L){
+luaL_checktype(L,2,5);
+getfunc(L,0);
+lua_pushvalue(L,2);
+if(lua_isnumber(L,1)&&lua_tonumber(L,1)==0){
+lua_pushthread(L);
+lua_insert(L,-2);
+lua_setfenv(L,-2);
+return 0;
+}
+else if(lua_iscfunction(L,-2)||lua_setfenv(L,-2)==0)
+luaL_error(L,
+LUA_QL("setfenv")" cannot change environment of given object");
+return 1;
+}
+static int luaB_rawget(lua_State*L){
+luaL_checktype(L,1,5);
+luaL_checkany(L,2);
+lua_settop(L,2);
+lua_rawget(L,1);
+return 1;
+}
+static int luaB_type(lua_State*L){
+luaL_checkany(L,1);
+lua_pushstring(L,luaL_typename(L,1));
+return 1;
+}
+static int luaB_next(lua_State*L){
+luaL_checktype(L,1,5);
+lua_settop(L,2);
+if(lua_next(L,1))
+return 2;
+else{
+lua_pushnil(L);
+return 1;
+}
+}
+static int luaB_pairs(lua_State*L){
+luaL_checktype(L,1,5);
+lua_pushvalue(L,lua_upvalueindex(1));
+lua_pushvalue(L,1);
+lua_pushnil(L);
+return 3;
+}
+static int ipairsaux(lua_State*L){
+int i=luaL_checkint(L,2);
+luaL_checktype(L,1,5);
+i++;
+lua_pushinteger(L,i);
+lua_rawgeti(L,1,i);
+return(lua_isnil(L,-1))?0:2;
+}
+static int luaB_ipairs(lua_State*L){
+luaL_checktype(L,1,5);
+lua_pushvalue(L,lua_upvalueindex(1));
+lua_pushvalue(L,1);
+lua_pushinteger(L,0);
+return 3;
+}
+static int load_aux(lua_State*L,int status){
+if(status==0)
+return 1;
+else{
+lua_pushnil(L);
+lua_insert(L,-2);
+return 2;
+}
+}
+static int luaB_loadstring(lua_State*L){
+size_t l;
+const char*s=luaL_checklstring(L,1,&l);
+const char*chunkname=luaL_optstring(L,2,s);
+return load_aux(L,luaL_loadbuffer(L,s,l,chunkname));
+}
+static int luaB_loadfile(lua_State*L){
+const char*fname=luaL_optstring(L,1,NULL);
+return load_aux(L,luaL_loadfile(L,fname));
+}
+static int luaB_assert(lua_State*L){
+luaL_checkany(L,1);
+if(!lua_toboolean(L,1))
+return luaL_error(L,"%s",luaL_optstring(L,2,"assertion failed!"));
+return lua_gettop(L);
+}
+static int luaB_unpack(lua_State*L){
+int i,e,n;
+luaL_checktype(L,1,5);
+i=luaL_optint(L,2,1);
+e=luaL_opt(L,luaL_checkint,3,luaL_getn(L,1));
+if(i>e)return 0;
+n=e-i+1;
+if(n<=0||!lua_checkstack(L,n))
+return luaL_error(L,"too many results to unpack");
+lua_rawgeti(L,1,i);
+while(i++<e)
+lua_rawgeti(L,1,i);
+return n;
+}
+static int luaB_pcall(lua_State*L){
+int status;
+luaL_checkany(L,1);
+status=lua_pcall(L,lua_gettop(L)-1,(-1),0);
+lua_pushboolean(L,(status==0));
+lua_insert(L,1);
+return lua_gettop(L);
+}
+static int luaB_newproxy(lua_State*L){
+lua_settop(L,1);
+lua_newuserdata(L,0);
+if(lua_toboolean(L,1)==0)
+return 1;
+else if(lua_isboolean(L,1)){
+lua_newtable(L);
+lua_pushvalue(L,-1);
+lua_pushboolean(L,1);
+lua_rawset(L,lua_upvalueindex(1));
+}
+else{
+int validproxy=0;
+if(lua_getmetatable(L,1)){
+lua_rawget(L,lua_upvalueindex(1));
+validproxy=lua_toboolean(L,-1);
+lua_pop(L,1);
+}
+luaL_argcheck(L,validproxy,1,"boolean or proxy expected");
+lua_getmetatable(L,1);
+}
+lua_setmetatable(L,2);
+return 1;
+}
+static const luaL_Reg base_funcs[]={
+{"assert",luaB_assert},
+{"error",luaB_error},
+{"loadfile",luaB_loadfile},
+{"loadstring",luaB_loadstring},
+{"next",luaB_next},
+{"pcall",luaB_pcall},
+{"rawget",luaB_rawget},
+{"setfenv",luaB_setfenv},
+{"setmetatable",luaB_setmetatable},
+{"tonumber",luaB_tonumber},
+{"type",luaB_type},
+{"unpack",luaB_unpack},
+{NULL,NULL}
+};
+static void auxopen(lua_State*L,const char*name,
+lua_CFunction f,lua_CFunction u){
+lua_pushcfunction(L,u);
+lua_pushcclosure(L,f,1);
+lua_setfield(L,-2,name);
+}
+static void base_open(lua_State*L){
+lua_pushvalue(L,(-10002));
+lua_setglobal(L,"_G");
+luaL_register(L,"_G",base_funcs);
+lua_pushliteral(L,"Lua 5.1");
+lua_setglobal(L,"_VERSION");
+auxopen(L,"ipairs",luaB_ipairs,ipairsaux);
+auxopen(L,"pairs",luaB_pairs,luaB_next);
+lua_createtable(L,0,1);
+lua_pushvalue(L,-1);
+lua_setmetatable(L,-2);
+lua_pushliteral(L,"kv");
+lua_setfield(L,-2,"__mode");
+lua_pushcclosure(L,luaB_newproxy,1);
+lua_setglobal(L,"newproxy");
+}
+static int luaopen_base(lua_State*L){
+base_open(L);
+return 1;
+}
+#define aux_getn(L,n)(luaL_checktype(L,n,5),luaL_getn(L,n))
+static int tinsert(lua_State*L){
+int e=aux_getn(L,1)+1;
+int pos;
+switch(lua_gettop(L)){
+case 2:{
+pos=e;
+break;
+}
+case 3:{
+int i;
+pos=luaL_checkint(L,2);
+if(pos>e)e=pos;
+for(i=e;i>pos;i--){
+lua_rawgeti(L,1,i-1);
+lua_rawseti(L,1,i);
+}
+break;
+}
+default:{
+return luaL_error(L,"wrong number of arguments to "LUA_QL("insert"));
+}
+}
+luaL_setn(L,1,e);
+lua_rawseti(L,1,pos);
+return 0;
+}
+static int tremove(lua_State*L){
+int e=aux_getn(L,1);
+int pos=luaL_optint(L,2,e);
+if(!(1<=pos&&pos<=e))
+return 0;
+luaL_setn(L,1,e-1);
+lua_rawgeti(L,1,pos);
+for(;pos<e;pos++){
+lua_rawgeti(L,1,pos+1);
+lua_rawseti(L,1,pos);
+}
+lua_pushnil(L);
+lua_rawseti(L,1,e);
+return 1;
+}
+static void addfield(lua_State*L,luaL_Buffer*b,int i){
+lua_rawgeti(L,1,i);
+if(!lua_isstring(L,-1))
+luaL_error(L,"invalid value (%s) at index %d in table for "
+LUA_QL("concat"),luaL_typename(L,-1),i);
+luaL_addvalue(b);
+}
+static int tconcat(lua_State*L){
+luaL_Buffer b;
+size_t lsep;
+int i,last;
+const char*sep=luaL_optlstring(L,2,"",&lsep);
+luaL_checktype(L,1,5);
+i=luaL_optint(L,3,1);
+last=luaL_opt(L,luaL_checkint,4,luaL_getn(L,1));
+luaL_buffinit(L,&b);
+for(;i<last;i++){
+addfield(L,&b,i);
+luaL_addlstring(&b,sep,lsep);
+}
+if(i==last)
+addfield(L,&b,i);
+luaL_pushresult(&b);
+return 1;
+}
+static void set2(lua_State*L,int i,int j){
+lua_rawseti(L,1,i);
+lua_rawseti(L,1,j);
+}
+static int sort_comp(lua_State*L,int a,int b){
+if(!lua_isnil(L,2)){
+int res;
+lua_pushvalue(L,2);
+lua_pushvalue(L,a-1);
+lua_pushvalue(L,b-2);
+lua_call(L,2,1);
+res=lua_toboolean(L,-1);
+lua_pop(L,1);
+return res;
+}
+else
+return lua_lessthan(L,a,b);
+}
+static void auxsort(lua_State*L,int l,int u){
+while(l<u){
+int i,j;
+lua_rawgeti(L,1,l);
+lua_rawgeti(L,1,u);
+if(sort_comp(L,-1,-2))
+set2(L,l,u);
+else
+lua_pop(L,2);
+if(u-l==1)break;
+i=(l+u)/2;
+lua_rawgeti(L,1,i);
+lua_rawgeti(L,1,l);
+if(sort_comp(L,-2,-1))
+set2(L,i,l);
+else{
+lua_pop(L,1);
+lua_rawgeti(L,1,u);
+if(sort_comp(L,-1,-2))
+set2(L,i,u);
+else
+lua_pop(L,2);
+}
+if(u-l==2)break;
+lua_rawgeti(L,1,i);
+lua_pushvalue(L,-1);
+lua_rawgeti(L,1,u-1);
+set2(L,i,u-1);
+i=l;j=u-1;
+for(;;){
+while(lua_rawgeti(L,1,++i),sort_comp(L,-1,-2)){
+if(i>u)luaL_error(L,"invalid order function for sorting");
+lua_pop(L,1);
+}
+while(lua_rawgeti(L,1,--j),sort_comp(L,-3,-1)){
+if(j<l)luaL_error(L,"invalid order function for sorting");
+lua_pop(L,1);
+}
+if(j<i){
+lua_pop(L,3);
+break;
+}
+set2(L,i,j);
+}
+lua_rawgeti(L,1,u-1);
+lua_rawgeti(L,1,i);
+set2(L,u-1,i);
+if(i-l<u-i){
+j=l;i=i-1;l=i+2;
+}
+else{
+j=i+1;i=u;u=j-2;
+}
+auxsort(L,j,i);
+}
+}
+static int sort(lua_State*L){
+int n=aux_getn(L,1);
+luaL_checkstack(L,40,"");
+if(!lua_isnoneornil(L,2))
+luaL_checktype(L,2,6);
+lua_settop(L,2);
+auxsort(L,1,n);
+return 0;
+}
+static const luaL_Reg tab_funcs[]={
+{"concat",tconcat},
+{"insert",tinsert},
+{"remove",tremove},
+{"sort",sort},
+{NULL,NULL}
+};
+static int luaopen_table(lua_State*L){
+luaL_register(L,"table",tab_funcs);
+return 1;
+}
+static const char*const fnames[]={"input","output"};
+static int pushresult(lua_State*L,int i,const char*filename){
+int en=errno;
+if(i){
+lua_pushboolean(L,1);
+return 1;
+}
+else{
+lua_pushnil(L);
+if(filename)
+lua_pushfstring(L,"%s: %s",filename,strerror(en));
+else
+lua_pushfstring(L,"%s",strerror(en));
+lua_pushinteger(L,en);
+return 3;
+}
+}
+static void fileerror(lua_State*L,int arg,const char*filename){
+lua_pushfstring(L,"%s: %s",filename,strerror(errno));
+luaL_argerror(L,arg,lua_tostring(L,-1));
+}
+#define tofilep(L)((FILE**)luaL_checkudata(L,1,"FILE*"))
+static int io_type(lua_State*L){
+void*ud;
+luaL_checkany(L,1);
+ud=lua_touserdata(L,1);
+lua_getfield(L,(-10000),"FILE*");
+if(ud==NULL||!lua_getmetatable(L,1)||!lua_rawequal(L,-2,-1))
+lua_pushnil(L);
+else if(*((FILE**)ud)==NULL)
+lua_pushliteral(L,"closed file");
+else
+lua_pushliteral(L,"file");
+return 1;
+}
+static FILE*tofile(lua_State*L){
+FILE**f=tofilep(L);
+if(*f==NULL)
+luaL_error(L,"attempt to use a closed file");
+return*f;
+}
+static FILE**newfile(lua_State*L){
+FILE**pf=(FILE**)lua_newuserdata(L,sizeof(FILE*));
+*pf=NULL;
+luaL_getmetatable(L,"FILE*");
+lua_setmetatable(L,-2);
+return pf;
+}
+static int io_noclose(lua_State*L){
+lua_pushnil(L);
+lua_pushliteral(L,"cannot close standard file");
+return 2;
+}
+static int io_pclose(lua_State*L){
+FILE**p=tofilep(L);
+int ok=lua_pclose(L,*p);
+*p=NULL;
+return pushresult(L,ok,NULL);
+}
+static int io_fclose(lua_State*L){
+FILE**p=tofilep(L);
+int ok=(fclose(*p)==0);
+*p=NULL;
+return pushresult(L,ok,NULL);
+}
+static int aux_close(lua_State*L){
+lua_getfenv(L,1);
+lua_getfield(L,-1,"__close");
+return(lua_tocfunction(L,-1))(L);
+}
+static int io_close(lua_State*L){
+if(lua_isnone(L,1))
+lua_rawgeti(L,(-10001),2);
+tofile(L);
+return aux_close(L);
+}
+static int io_gc(lua_State*L){
+FILE*f=*tofilep(L);
+if(f!=NULL)
+aux_close(L);
+return 0;
+}
+static int io_open(lua_State*L){
+const char*filename=luaL_checkstring(L,1);
+const char*mode=luaL_optstring(L,2,"r");
+FILE**pf=newfile(L);
+*pf=fopen(filename,mode);
+return(*pf==NULL)?pushresult(L,0,filename):1;
+}
+static FILE*getiofile(lua_State*L,int findex){
+FILE*f;
+lua_rawgeti(L,(-10001),findex);
+f=*(FILE**)lua_touserdata(L,-1);
+if(f==NULL)
+luaL_error(L,"standard %s file is closed",fnames[findex-1]);
+return f;
+}
+static int g_iofile(lua_State*L,int f,const char*mode){
+if(!lua_isnoneornil(L,1)){
+const char*filename=lua_tostring(L,1);
+if(filename){
+FILE**pf=newfile(L);
+*pf=fopen(filename,mode);
+if(*pf==NULL)
+fileerror(L,1,filename);
+}
+else{
+tofile(L);
+lua_pushvalue(L,1);
+}
+lua_rawseti(L,(-10001),f);
+}
+lua_rawgeti(L,(-10001),f);
+return 1;
+}
+static int io_input(lua_State*L){
+return g_iofile(L,1,"r");
+}
+static int io_output(lua_State*L){
+return g_iofile(L,2,"w");
+}
+static int io_readline(lua_State*L);
+static void aux_lines(lua_State*L,int idx,int toclose){
+lua_pushvalue(L,idx);
+lua_pushboolean(L,toclose);
+lua_pushcclosure(L,io_readline,2);
+}
+static int f_lines(lua_State*L){
+tofile(L);
+aux_lines(L,1,0);
+return 1;
+}
+static int io_lines(lua_State*L){
+if(lua_isnoneornil(L,1)){
+lua_rawgeti(L,(-10001),1);
+return f_lines(L);
+}
+else{
+const char*filename=luaL_checkstring(L,1);
+FILE**pf=newfile(L);
+*pf=fopen(filename,"r");
+if(*pf==NULL)
+fileerror(L,1,filename);
+aux_lines(L,lua_gettop(L),1);
+return 1;
+}
+}
+static int read_number(lua_State*L,FILE*f){
+lua_Number d;
+if(fscanf(f,"%lf",&d)==1){
+lua_pushnumber(L,d);
+return 1;
+}
+else{
+lua_pushnil(L);
+return 0;
+}
+}
+static int test_eof(lua_State*L,FILE*f){
+int c=getc(f);
+ungetc(c,f);
+lua_pushlstring(L,NULL,0);
+return(c!=EOF);
+}
+static int read_line(lua_State*L,FILE*f){
+luaL_Buffer b;
+luaL_buffinit(L,&b);
+for(;;){
+size_t l;
+char*p=luaL_prepbuffer(&b);
+if(fgets(p,BUFSIZ,f)==NULL){
+luaL_pushresult(&b);
+return(lua_objlen(L,-1)>0);
+}
+l=strlen(p);
+if(l==0||p[l-1]!='\n')
+luaL_addsize(&b,l);
+else{
+luaL_addsize(&b,l-1);
+luaL_pushresult(&b);
+return 1;
+}
+}
+}
+static int read_chars(lua_State*L,FILE*f,size_t n){
+size_t rlen;
+size_t nr;
+luaL_Buffer b;
+luaL_buffinit(L,&b);
+rlen=BUFSIZ;
+do{
+char*p=luaL_prepbuffer(&b);
+if(rlen>n)rlen=n;
+nr=fread(p,sizeof(char),rlen,f);
+luaL_addsize(&b,nr);
+n-=nr;
+}while(n>0&&nr==rlen);
+luaL_pushresult(&b);
+return(n==0||lua_objlen(L,-1)>0);
+}
+static int g_read(lua_State*L,FILE*f,int first){
+int nargs=lua_gettop(L)-1;
+int success;
+int n;
+clearerr(f);
+if(nargs==0){
+success=read_line(L,f);
+n=first+1;
+}
+else{
+luaL_checkstack(L,nargs+20,"too many arguments");
+success=1;
+for(n=first;nargs--&&success;n++){
+if(lua_type(L,n)==3){
+size_t l=(size_t)lua_tointeger(L,n);
+success=(l==0)?test_eof(L,f):read_chars(L,f,l);
+}
+else{
+const char*p=lua_tostring(L,n);
+luaL_argcheck(L,p&&p[0]=='*',n,"invalid option");
+switch(p[1]){
+case'n':
+success=read_number(L,f);
+break;
+case'l':
+success=read_line(L,f);
+break;
+case'a':
+read_chars(L,f,~((size_t)0));
+success=1;
+break;
+default:
+return luaL_argerror(L,n,"invalid format");
+}
+}
+}
+}
+if(ferror(f))
+return pushresult(L,0,NULL);
+if(!success){
+lua_pop(L,1);
+lua_pushnil(L);
+}
+return n-first;
+}
+static int io_read(lua_State*L){
+return g_read(L,getiofile(L,1),1);
+}
+static int f_read(lua_State*L){
+return g_read(L,tofile(L),2);
+}
+static int io_readline(lua_State*L){
+FILE*f=*(FILE**)lua_touserdata(L,lua_upvalueindex(1));
+int sucess;
+if(f==NULL)
+luaL_error(L,"file is already closed");
+sucess=read_line(L,f);
+if(ferror(f))
+return luaL_error(L,"%s",strerror(errno));
+if(sucess)return 1;
+else{
+if(lua_toboolean(L,lua_upvalueindex(2))){
+lua_settop(L,0);
+lua_pushvalue(L,lua_upvalueindex(1));
+aux_close(L);
+}
+return 0;
+}
+}
+static int g_write(lua_State*L,FILE*f,int arg){
+int nargs=lua_gettop(L)-1;
+int status=1;
+for(;nargs--;arg++){
+if(lua_type(L,arg)==3){
+status=status&&
+fprintf(f,"%.14g",lua_tonumber(L,arg))>0;
+}
+else{
+size_t l;
+const char*s=luaL_checklstring(L,arg,&l);
+status=status&&(fwrite(s,sizeof(char),l,f)==l);
+}
+}
+return pushresult(L,status,NULL);
+}
+static int io_write(lua_State*L){
+return g_write(L,getiofile(L,2),1);
+}
+static int f_write(lua_State*L){
+return g_write(L,tofile(L),2);
+}
+static int io_flush(lua_State*L){
+return pushresult(L,fflush(getiofile(L,2))==0,NULL);
+}
+static int f_flush(lua_State*L){
+return pushresult(L,fflush(tofile(L))==0,NULL);
+}
+static const luaL_Reg iolib[]={
+{"close",io_close},
+{"flush",io_flush},
+{"input",io_input},
+{"lines",io_lines},
+{"open",io_open},
+{"output",io_output},
+{"read",io_read},
+{"type",io_type},
+{"write",io_write},
+{NULL,NULL}
+};
+static const luaL_Reg flib[]={
+{"close",io_close},
+{"flush",f_flush},
+{"lines",f_lines},
+{"read",f_read},
+{"write",f_write},
+{"__gc",io_gc},
+{NULL,NULL}
+};
+static void createmeta(lua_State*L){
+luaL_newmetatable(L,"FILE*");
+lua_pushvalue(L,-1);
+lua_setfield(L,-2,"__index");
+luaL_register(L,NULL,flib);
+}
+static void createstdfile(lua_State*L,FILE*f,int k,const char*fname){
+*newfile(L)=f;
+if(k>0){
+lua_pushvalue(L,-1);
+lua_rawseti(L,(-10001),k);
+}
+lua_pushvalue(L,-2);
+lua_setfenv(L,-2);
+lua_setfield(L,-3,fname);
+}
+static void newfenv(lua_State*L,lua_CFunction cls){
+lua_createtable(L,0,1);
+lua_pushcfunction(L,cls);
+lua_setfield(L,-2,"__close");
+}
+static int luaopen_io(lua_State*L){
+createmeta(L);
+newfenv(L,io_fclose);
+lua_replace(L,(-10001));
+luaL_register(L,"io",iolib);
+newfenv(L,io_noclose);
+createstdfile(L,stdin,1,"stdin");
+createstdfile(L,stdout,2,"stdout");
+createstdfile(L,stderr,0,"stderr");
+lua_pop(L,1);
+lua_getfield(L,-1,"popen");
+newfenv(L,io_pclose);
+lua_setfenv(L,-2);
+lua_pop(L,1);
+return 1;
+}
+static int os_pushresult(lua_State*L,int i,const char*filename){
+int en=errno;
+if(i){
+lua_pushboolean(L,1);
+return 1;
+}
+else{
+lua_pushnil(L);
+lua_pushfstring(L,"%s: %s",filename,strerror(en));
+lua_pushinteger(L,en);
+return 3;
+}
+}
+static int os_remove(lua_State*L){
+const char*filename=luaL_checkstring(L,1);
+return os_pushresult(L,remove(filename)==0,filename);
+}
+static int os_exit(lua_State*L){
+exit(luaL_optint(L,1,EXIT_SUCCESS));
+}
+static const luaL_Reg syslib[]={
+{"exit",os_exit},
+{"remove",os_remove},
+{NULL,NULL}
+};
+static int luaopen_os(lua_State*L){
+luaL_register(L,"os",syslib);
+return 1;
+}
+#define uchar(c)((unsigned char)(c))
+static ptrdiff_t posrelat(ptrdiff_t pos,size_t len){
+if(pos<0)pos+=(ptrdiff_t)len+1;
+return(pos>=0)?pos:0;
+}
+static int str_sub(lua_State*L){
+size_t l;
+const char*s=luaL_checklstring(L,1,&l);
+ptrdiff_t start=posrelat(luaL_checkinteger(L,2),l);
+ptrdiff_t end=posrelat(luaL_optinteger(L,3,-1),l);
+if(start<1)start=1;
+if(end>(ptrdiff_t)l)end=(ptrdiff_t)l;
+if(start<=end)
+lua_pushlstring(L,s+start-1,end-start+1);
+else lua_pushliteral(L,"");
+return 1;
+}
+static int str_lower(lua_State*L){
+size_t l;
+size_t i;
+luaL_Buffer b;
+const char*s=luaL_checklstring(L,1,&l);
+luaL_buffinit(L,&b);
+for(i=0;i<l;i++)
+luaL_addchar(&b,tolower(uchar(s[i])));
+luaL_pushresult(&b);
+return 1;
+}
+static int str_upper(lua_State*L){
+size_t l;
+size_t i;
+luaL_Buffer b;
+const char*s=luaL_checklstring(L,1,&l);
+luaL_buffinit(L,&b);
+for(i=0;i<l;i++)
+luaL_addchar(&b,toupper(uchar(s[i])));
+luaL_pushresult(&b);
+return 1;
+}
+static int str_rep(lua_State*L){
+size_t l;
+luaL_Buffer b;
+const char*s=luaL_checklstring(L,1,&l);
+int n=luaL_checkint(L,2);
+luaL_buffinit(L,&b);
+while(n-->0)
+luaL_addlstring(&b,s,l);
+luaL_pushresult(&b);
+return 1;
+}
+static int str_byte(lua_State*L){
+size_t l;
+const char*s=luaL_checklstring(L,1,&l);
+ptrdiff_t posi=posrelat(luaL_optinteger(L,2,1),l);
+ptrdiff_t pose=posrelat(luaL_optinteger(L,3,posi),l);
+int n,i;
+if(posi<=0)posi=1;
+if((size_t)pose>l)pose=l;
+if(posi>pose)return 0;
+n=(int)(pose-posi+1);
+if(posi+n<=pose)
+luaL_error(L,"string slice too long");
+luaL_checkstack(L,n,"string slice too long");
+for(i=0;i<n;i++)
+lua_pushinteger(L,uchar(s[posi+i-1]));
+return n;
+}
+static int str_char(lua_State*L){
+int n=lua_gettop(L);
+int i;
+luaL_Buffer b;
+luaL_buffinit(L,&b);
+for(i=1;i<=n;i++){
+int c=luaL_checkint(L,i);
+luaL_argcheck(L,uchar(c)==c,i,"invalid value");
+luaL_addchar(&b,uchar(c));
+}
+luaL_pushresult(&b);
+return 1;
+}
+typedef struct MatchState{
+const char*src_init;
+const char*src_end;
+lua_State*L;
+int level;
+struct{
+const char*init;
+ptrdiff_t len;
+}capture[32];
+}MatchState;
+static int check_capture(MatchState*ms,int l){
+l-='1';
+if(l<0||l>=ms->level||ms->capture[l].len==(-1))
+return luaL_error(ms->L,"invalid capture index");
+return l;
+}
+static int capture_to_close(MatchState*ms){
+int level=ms->level;
+for(level--;level>=0;level--)
+if(ms->capture[level].len==(-1))return level;
+return luaL_error(ms->L,"invalid pattern capture");
+}
+static const char*classend(MatchState*ms,const char*p){
+switch(*p++){
+case'%':{
+if(*p=='\0')
+luaL_error(ms->L,"malformed pattern (ends with "LUA_QL("%%")")");
+return p+1;
+}
+case'[':{
+if(*p=='^')p++;
+do{
+if(*p=='\0')
+luaL_error(ms->L,"malformed pattern (missing "LUA_QL("]")")");
+if(*(p++)=='%'&&*p!='\0')
+p++;
+}while(*p!=']');
+return p+1;
+}
+default:{
+return p;
+}
+}
+}
+static int match_class(int c,int cl){
+int res;
+switch(tolower(cl)){
+case'a':res=isalpha(c);break;
+case'c':res=iscntrl(c);break;
+case'd':res=isdigit(c);break;
+case'l':res=islower(c);break;
+case'p':res=ispunct(c);break;
+case's':res=isspace(c);break;
+case'u':res=isupper(c);break;
+case'w':res=isalnum(c);break;
+case'x':res=isxdigit(c);break;
+case'z':res=(c==0);break;
+default:return(cl==c);
+}
+return(islower(cl)?res:!res);
+}
+static int matchbracketclass(int c,const char*p,const char*ec){
+int sig=1;
+if(*(p+1)=='^'){
+sig=0;
+p++;
+}
+while(++p<ec){
+if(*p=='%'){
+p++;
+if(match_class(c,uchar(*p)))
+return sig;
+}
+else if((*(p+1)=='-')&&(p+2<ec)){
+p+=2;
+if(uchar(*(p-2))<=c&&c<=uchar(*p))
+return sig;
+}
+else if(uchar(*p)==c)return sig;
+}
+return!sig;
+}
+static int singlematch(int c,const char*p,const char*ep){
+switch(*p){
+case'.':return 1;
+case'%':return match_class(c,uchar(*(p+1)));
+case'[':return matchbracketclass(c,p,ep-1);
+default:return(uchar(*p)==c);
+}
+}
+static const char*match(MatchState*ms,const char*s,const char*p);
+static const char*matchbalance(MatchState*ms,const char*s,
+const char*p){
+if(*p==0||*(p+1)==0)
+luaL_error(ms->L,"unbalanced pattern");
+if(*s!=*p)return NULL;
+else{
+int b=*p;
+int e=*(p+1);
+int cont=1;
+while(++s<ms->src_end){
+if(*s==e){
+if(--cont==0)return s+1;
+}
+else if(*s==b)cont++;
+}
+}
+return NULL;
+}
+static const char*max_expand(MatchState*ms,const char*s,
+const char*p,const char*ep){
+ptrdiff_t i=0;
+while((s+i)<ms->src_end&&singlematch(uchar(*(s+i)),p,ep))
+i++;
+while(i>=0){
+const char*res=match(ms,(s+i),ep+1);
+if(res)return res;
+i--;
+}
+return NULL;
+}
+static const char*min_expand(MatchState*ms,const char*s,
+const char*p,const char*ep){
+for(;;){
+const char*res=match(ms,s,ep+1);
+if(res!=NULL)
+return res;
+else if(s<ms->src_end&&singlematch(uchar(*s),p,ep))
+s++;
+else return NULL;
+}
+}
+static const char*start_capture(MatchState*ms,const char*s,
+const char*p,int what){
+const char*res;
+int level=ms->level;
+if(level>=32)luaL_error(ms->L,"too many captures");
+ms->capture[level].init=s;
+ms->capture[level].len=what;
+ms->level=level+1;
+if((res=match(ms,s,p))==NULL)
+ms->level--;
+return res;
+}
+static const char*end_capture(MatchState*ms,const char*s,
+const char*p){
+int l=capture_to_close(ms);
+const char*res;
+ms->capture[l].len=s-ms->capture[l].init;
+if((res=match(ms,s,p))==NULL)
+ms->capture[l].len=(-1);
+return res;
+}
+static const char*match_capture(MatchState*ms,const char*s,int l){
+size_t len;
+l=check_capture(ms,l);
+len=ms->capture[l].len;
+if((size_t)(ms->src_end-s)>=len&&
+memcmp(ms->capture[l].init,s,len)==0)
+return s+len;
+else return NULL;
+}
+static const char*match(MatchState*ms,const char*s,const char*p){
+init:
+switch(*p){
+case'(':{
+if(*(p+1)==')')
+return start_capture(ms,s,p+2,(-2));
+else
+return start_capture(ms,s,p+1,(-1));
+}
+case')':{
+return end_capture(ms,s,p+1);
+}
+case'%':{
+switch(*(p+1)){
+case'b':{
+s=matchbalance(ms,s,p+2);
+if(s==NULL)return NULL;
+p+=4;goto init;
+}
+case'f':{
+const char*ep;char previous;
+p+=2;
+if(*p!='[')
+luaL_error(ms->L,"missing "LUA_QL("[")" after "
+LUA_QL("%%f")" in pattern");
+ep=classend(ms,p);
+previous=(s==ms->src_init)?'\0':*(s-1);
+if(matchbracketclass(uchar(previous),p,ep-1)||
+!matchbracketclass(uchar(*s),p,ep-1))return NULL;
+p=ep;goto init;
+}
+default:{
+if(isdigit(uchar(*(p+1)))){
+s=match_capture(ms,s,uchar(*(p+1)));
+if(s==NULL)return NULL;
+p+=2;goto init;
+}
+goto dflt;
+}
+}
+}
+case'\0':{
+return s;
+}
+case'$':{
+if(*(p+1)=='\0')
+return(s==ms->src_end)?s:NULL;
+else goto dflt;
+}
+default:dflt:{
+const char*ep=classend(ms,p);
+int m=s<ms->src_end&&singlematch(uchar(*s),p,ep);
+switch(*ep){
+case'?':{
+const char*res;
+if(m&&((res=match(ms,s+1,ep+1))!=NULL))
+return res;
+p=ep+1;goto init;
+}
+case'*':{
+return max_expand(ms,s,p,ep);
+}
+case'+':{
+return(m?max_expand(ms,s+1,p,ep):NULL);
+}
+case'-':{
+return min_expand(ms,s,p,ep);
+}
+default:{
+if(!m)return NULL;
+s++;p=ep;goto init;
+}
+}
+}
+}
+}
+static const char*lmemfind(const char*s1,size_t l1,
+const char*s2,size_t l2){
+if(l2==0)return s1;
+else if(l2>l1)return NULL;
+else{
+const char*init;
+l2--;
+l1=l1-l2;
+while(l1>0&&(init=(const char*)memchr(s1,*s2,l1))!=NULL){
+init++;
+if(memcmp(init,s2+1,l2)==0)
+return init-1;
+else{
+l1-=init-s1;
+s1=init;
+}
+}
+return NULL;
+}
+}
+static void push_onecapture(MatchState*ms,int i,const char*s,
+const char*e){
+if(i>=ms->level){
+if(i==0)
+lua_pushlstring(ms->L,s,e-s);
+else
+luaL_error(ms->L,"invalid capture index");
+}
+else{
+ptrdiff_t l=ms->capture[i].len;
+if(l==(-1))luaL_error(ms->L,"unfinished capture");
+if(l==(-2))
+lua_pushinteger(ms->L,ms->capture[i].init-ms->src_init+1);
+else
+lua_pushlstring(ms->L,ms->capture[i].init,l);
+}
+}
+static int push_captures(MatchState*ms,const char*s,const char*e){
+int i;
+int nlevels=(ms->level==0&&s)?1:ms->level;
+luaL_checkstack(ms->L,nlevels,"too many captures");
+for(i=0;i<nlevels;i++)
+push_onecapture(ms,i,s,e);
+return nlevels;
+}
+static int str_find_aux(lua_State*L,int find){
+size_t l1,l2;
+const char*s=luaL_checklstring(L,1,&l1);
+const char*p=luaL_checklstring(L,2,&l2);
+ptrdiff_t init=posrelat(luaL_optinteger(L,3,1),l1)-1;
+if(init<0)init=0;
+else if((size_t)(init)>l1)init=(ptrdiff_t)l1;
+if(find&&(lua_toboolean(L,4)||
+strpbrk(p,"^$*+?.([%-")==NULL)){
+const char*s2=lmemfind(s+init,l1-init,p,l2);
+if(s2){
+lua_pushinteger(L,s2-s+1);
+lua_pushinteger(L,s2-s+l2);
+return 2;
+}
+}
+else{
+MatchState ms;
+int anchor=(*p=='^')?(p++,1):0;
+const char*s1=s+init;
+ms.L=L;
+ms.src_init=s;
+ms.src_end=s+l1;
+do{
+const char*res;
+ms.level=0;
+if((res=match(&ms,s1,p))!=NULL){
+if(find){
+lua_pushinteger(L,s1-s+1);
+lua_pushinteger(L,res-s);
+return push_captures(&ms,NULL,0)+2;
+}
+else
+return push_captures(&ms,s1,res);
+}
+}while(s1++<ms.src_end&&!anchor);
+}
+lua_pushnil(L);
+return 1;
+}
+static int str_find(lua_State*L){
+return str_find_aux(L,1);
+}
+static int str_match(lua_State*L){
+return str_find_aux(L,0);
+}
+static int gmatch_aux(lua_State*L){
+MatchState ms;
+size_t ls;
+const char*s=lua_tolstring(L,lua_upvalueindex(1),&ls);
+const char*p=lua_tostring(L,lua_upvalueindex(2));
+const char*src;
+ms.L=L;
+ms.src_init=s;
+ms.src_end=s+ls;
+for(src=s+(size_t)lua_tointeger(L,lua_upvalueindex(3));
+src<=ms.src_end;
+src++){
+const char*e;
+ms.level=0;
+if((e=match(&ms,src,p))!=NULL){
+lua_Integer newstart=e-s;
+if(e==src)newstart++;
+lua_pushinteger(L,newstart);
+lua_replace(L,lua_upvalueindex(3));
+return push_captures(&ms,src,e);
+}
+}
+return 0;
+}
+static int gmatch(lua_State*L){
+luaL_checkstring(L,1);
+luaL_checkstring(L,2);
+lua_settop(L,2);
+lua_pushinteger(L,0);
+lua_pushcclosure(L,gmatch_aux,3);
+return 1;
+}
+static void add_s(MatchState*ms,luaL_Buffer*b,const char*s,
+const char*e){
+size_t l,i;
+const char*news=lua_tolstring(ms->L,3,&l);
+for(i=0;i<l;i++){
+if(news[i]!='%')
+luaL_addchar(b,news[i]);
+else{
+i++;
+if(!isdigit(uchar(news[i])))
+luaL_addchar(b,news[i]);
+else if(news[i]=='0')
+luaL_addlstring(b,s,e-s);
+else{
+push_onecapture(ms,news[i]-'1',s,e);
+luaL_addvalue(b);
+}
+}
+}
+}
+static void add_value(MatchState*ms,luaL_Buffer*b,const char*s,
+const char*e){
+lua_State*L=ms->L;
+switch(lua_type(L,3)){
+case 3:
+case 4:{
+add_s(ms,b,s,e);
+return;
+}
+case 6:{
+int n;
+lua_pushvalue(L,3);
+n=push_captures(ms,s,e);
+lua_call(L,n,1);
+break;
+}
+case 5:{
+push_onecapture(ms,0,s,e);
+lua_gettable(L,3);
+break;
+}
+}
+if(!lua_toboolean(L,-1)){
+lua_pop(L,1);
+lua_pushlstring(L,s,e-s);
+}
+else if(!lua_isstring(L,-1))
+luaL_error(L,"invalid replacement value (a %s)",luaL_typename(L,-1));
+luaL_addvalue(b);
+}
+static int str_gsub(lua_State*L){
+size_t srcl;
+const char*src=luaL_checklstring(L,1,&srcl);
+const char*p=luaL_checkstring(L,2);
+int tr=lua_type(L,3);
+int max_s=luaL_optint(L,4,srcl+1);
+int anchor=(*p=='^')?(p++,1):0;
+int n=0;
+MatchState ms;
+luaL_Buffer b;
+luaL_argcheck(L,tr==3||tr==4||
+tr==6||tr==5,3,
+"string/function/table expected");
+luaL_buffinit(L,&b);
+ms.L=L;
+ms.src_init=src;
+ms.src_end=src+srcl;
+while(n<max_s){
+const char*e;
+ms.level=0;
+e=match(&ms,src,p);
+if(e){
+n++;
+add_value(&ms,&b,src,e);
+}
+if(e&&e>src)
+src=e;
+else if(src<ms.src_end)
+luaL_addchar(&b,*src++);
+else break;
+if(anchor)break;
+}
+luaL_addlstring(&b,src,ms.src_end-src);
+luaL_pushresult(&b);
+lua_pushinteger(L,n);
+return 2;
+}
+static void addquoted(lua_State*L,luaL_Buffer*b,int arg){
+size_t l;
+const char*s=luaL_checklstring(L,arg,&l);
+luaL_addchar(b,'"');
+while(l--){
+switch(*s){
+case'"':case'\\':case'\n':{
+luaL_addchar(b,'\\');
+luaL_addchar(b,*s);
+break;
+}
+case'\r':{
+luaL_addlstring(b,"\\r",2);
+break;
+}
+case'\0':{
+luaL_addlstring(b,"\\000",4);
+break;
+}
+default:{
+luaL_addchar(b,*s);
+break;
+}
+}
+s++;
+}
+luaL_addchar(b,'"');
+}
+static const char*scanformat(lua_State*L,const char*strfrmt,char*form){
+const char*p=strfrmt;
+while(*p!='\0'&&strchr("-+ #0",*p)!=NULL)p++;
+if((size_t)(p-strfrmt)>=sizeof("-+ #0"))
+luaL_error(L,"invalid format (repeated flags)");
+if(isdigit(uchar(*p)))p++;
+if(isdigit(uchar(*p)))p++;
+if(*p=='.'){
+p++;
+if(isdigit(uchar(*p)))p++;
+if(isdigit(uchar(*p)))p++;
+}
+if(isdigit(uchar(*p)))
+luaL_error(L,"invalid format (width or precision too long)");
+*(form++)='%';
+strncpy(form,strfrmt,p-strfrmt+1);
+form+=p-strfrmt+1;
+*form='\0';
+return p;
+}
+static void addintlen(char*form){
+size_t l=strlen(form);
+char spec=form[l-1];
+strcpy(form+l-1,"l");
+form[l+sizeof("l")-2]=spec;
+form[l+sizeof("l")-1]='\0';
+}
+static int str_format(lua_State*L){
+int top=lua_gettop(L);
+int arg=1;
+size_t sfl;
+const char*strfrmt=luaL_checklstring(L,arg,&sfl);
+const char*strfrmt_end=strfrmt+sfl;
+luaL_Buffer b;
+luaL_buffinit(L,&b);
+while(strfrmt<strfrmt_end){
+if(*strfrmt!='%')
+luaL_addchar(&b,*strfrmt++);
+else if(*++strfrmt=='%')
+luaL_addchar(&b,*strfrmt++);
+else{
+char form[(sizeof("-+ #0")+sizeof("l")+10)];
+char buff[512];
+if(++arg>top)
+luaL_argerror(L,arg,"no value");
+strfrmt=scanformat(L,strfrmt,form);
+switch(*strfrmt++){
+case'c':{
+sprintf(buff,form,(int)luaL_checknumber(L,arg));
+break;
+}
+case'd':case'i':{
+addintlen(form);
+sprintf(buff,form,(long)luaL_checknumber(L,arg));
+break;
+}
+case'o':case'u':case'x':case'X':{
+addintlen(form);
+sprintf(buff,form,(unsigned long)luaL_checknumber(L,arg));
+break;
+}
+case'e':case'E':case'f':
+case'g':case'G':{
+sprintf(buff,form,(double)luaL_checknumber(L,arg));
+break;
+}
+case'q':{
+addquoted(L,&b,arg);
+continue;
+}
+case's':{
+size_t l;
+const char*s=luaL_checklstring(L,arg,&l);
+if(!strchr(form,'.')&&l>=100){
+lua_pushvalue(L,arg);
+luaL_addvalue(&b);
+continue;
+}
+else{
+sprintf(buff,form,s);
+break;
+}
+}
+default:{
+return luaL_error(L,"invalid option "LUA_QL("%%%c")" to "
+LUA_QL("format"),*(strfrmt-1));
+}
+}
+luaL_addlstring(&b,buff,strlen(buff));
+}
+}
+luaL_pushresult(&b);
+return 1;
+}
+static const luaL_Reg strlib[]={
+{"byte",str_byte},
+{"char",str_char},
+{"find",str_find},
+{"format",str_format},
+{"gmatch",gmatch},
+{"gsub",str_gsub},
+{"lower",str_lower},
+{"match",str_match},
+{"rep",str_rep},
+{"sub",str_sub},
+{"upper",str_upper},
+{NULL,NULL}
+};
+static void createmetatable(lua_State*L){
+lua_createtable(L,0,1);
+lua_pushliteral(L,"");
+lua_pushvalue(L,-2);
+lua_setmetatable(L,-2);
+lua_pop(L,1);
+lua_pushvalue(L,-2);
+lua_setfield(L,-2,"__index");
+lua_pop(L,1);
+}
+static int luaopen_string(lua_State*L){
+luaL_register(L,"string",strlib);
+createmetatable(L);
+return 1;
+}
+static const luaL_Reg lualibs[]={
+{"",luaopen_base},
+{"table",luaopen_table},
+{"io",luaopen_io},
+{"os",luaopen_os},
+{"string",luaopen_string},
+{NULL,NULL}
+};
+static void luaL_openlibs(lua_State*L){
+const luaL_Reg*lib=lualibs;
+for(;lib->func;lib++){
+lua_pushcfunction(L,lib->func);
+lua_pushstring(L,lib->name);
+lua_call(L,1,0);
+}
+}
+typedef unsigned int UB;
+static UB barg(lua_State*L,int idx){
+union{lua_Number n;U64 b;}bn;
+bn.n=lua_tonumber(L,idx)+6755399441055744.0;
+if(bn.n==0.0&&!lua_isnumber(L,idx))luaL_typerror(L,idx,"number");
+return(UB)bn.b;
+}
+#define BRET(b)lua_pushnumber(L,(lua_Number)(int)(b));return 1;
+static int tobit(lua_State*L){
+BRET(barg(L,1))}
+static int bnot(lua_State*L){
+BRET(~barg(L,1))}
+static int band(lua_State*L){
+int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b&=barg(L,i);BRET(b)}
+static int bor(lua_State*L){
+int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b|=barg(L,i);BRET(b)}
+static int bxor(lua_State*L){
+int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b^=barg(L,i);BRET(b)}
+static int lshift(lua_State*L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET(b<<n)}
+static int rshift(lua_State*L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET(b>>n)}
+static int arshift(lua_State*L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET((int)b>>n)}
+static int rol(lua_State*L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET((b<<n)|(b>>(32-n)))}
+static int ror(lua_State*L){
+UB b=barg(L,1),n=barg(L,2)&31;BRET((b>>n)|(b<<(32-n)))}
+static int bswap(lua_State*L){
+UB b=barg(L,1);b=(b>>24)|((b>>8)&0xff00)|((b&0xff00)<<8)|(b<<24);BRET(b)}
+static int tohex(lua_State*L){
+UB b=barg(L,1);
+int n=lua_isnone(L,2)?8:(int)barg(L,2);
+const char*hexdigits="0123456789abcdef";
+char buf[8];
+int i;
+if(n<0){n=-n;hexdigits="0123456789ABCDEF";}
+if(n>8)n=8;
+for(i=(int)n;--i>=0;){buf[i]=hexdigits[b&15];b>>=4;}
+lua_pushlstring(L,buf,(size_t)n);
+return 1;
+}
+static const struct luaL_Reg bitlib[]={
+{"tobit",tobit},
+{"bnot",bnot},
+{"band",band},
+{"bor",bor},
+{"bxor",bxor},
+{"lshift",lshift},
+{"rshift",rshift},
+{"arshift",arshift},
+{"rol",rol},
+{"ror",ror},
+{"bswap",bswap},
+{"tohex",tohex},
+{NULL,NULL}
+};
+int main(int argc,char**argv){
+lua_State*L=luaL_newstate();
+int i;
+luaL_openlibs(L);
+luaL_register(L,"bit",bitlib);
+if(argc<2)return sizeof(void*);
+lua_createtable(L,0,1);
+lua_pushstring(L,argv[1]);
+lua_rawseti(L,-2,0);
+lua_setglobal(L,"arg");
+if(luaL_loadfile(L,argv[1]))
+goto err;
+for(i=2;i<argc;i++)
+lua_pushstring(L,argv[i]);
+if(lua_pcall(L,argc-2,0,0)){
+err:
+fprintf(stderr,"Error: %s\n",lua_tostring(L,-1));
+return 1;
+}
+lua_close(L);
+return 0;
+}

+ 191 - 0
luajit.mod/luajit/src/jit/bc.lua

@@ -0,0 +1,191 @@
+----------------------------------------------------------------------------
+-- LuaJIT bytecode listing module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module lists the bytecode of a Lua function. If it's loaded by -jbc
+-- it hooks into the parser and lists all functions of a chunk as they
+-- are parsed.
+--
+-- Example usage:
+--
+--   luajit -jbc -e 'local x=0; for i=1,1e6 do x=x+i end; print(x)'
+--   luajit -jbc=- foo.lua
+--   luajit -jbc=foo.list foo.lua
+--
+-- Default output is to stderr. To redirect the output to a file, pass a
+-- filename as an argument (use '-' for stdout) or set the environment
+-- variable LUAJIT_LISTFILE. The file is overwritten every time the module
+-- is started.
+--
+-- This module can also be used programmatically:
+--
+--   local bc = require("jit.bc")
+--
+--   local function foo() print("hello") end
+--
+--   bc.dump(foo)           --> -- BYTECODE -- [...]
+--   print(bc.line(foo, 2)) --> 0002    KSTR     1   1      ; "hello"
+--
+--   local out = {
+--     -- Do something with each line:
+--     write = function(t, ...) io.write(...) end,
+--     close = function(t) end,
+--     flush = function(t) end,
+--   }
+--   bc.dump(foo, out)
+--
+------------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20004, "LuaJIT core/library version mismatch")
+local jutil = require("jit.util")
+local vmdef = require("jit.vmdef")
+local bit = require("bit")
+local sub, gsub, format = string.sub, string.gsub, string.format
+local byte, band, shr = string.byte, bit.band, bit.rshift
+local funcinfo, funcbc, funck = jutil.funcinfo, jutil.funcbc, jutil.funck
+local funcuvname = jutil.funcuvname
+local bcnames = vmdef.bcnames
+local stdout, stderr = io.stdout, io.stderr
+
+------------------------------------------------------------------------------
+
+local function ctlsub(c)
+  if c == "\n" then return "\\n"
+  elseif c == "\r" then return "\\r"
+  elseif c == "\t" then return "\\t"
+  else return format("\\%03d", byte(c))
+  end
+end
+
+-- Return one bytecode line.
+local function bcline(func, pc, prefix)
+  local ins, m = funcbc(func, pc)
+  if not ins then return end
+  local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128)
+  local a = band(shr(ins, 8), 0xff)
+  local oidx = 6*band(ins, 0xff)
+  local op = sub(bcnames, oidx+1, oidx+6)
+  local s = format("%04d %s %-6s %3s ",
+    pc, prefix or "  ", op, ma == 0 and "" or a)
+  local d = shr(ins, 16)
+  if mc == 13*128 then -- BCMjump
+    return format("%s=> %04d\n", s, pc+d-0x7fff)
+  end
+  if mb ~= 0 then
+    d = band(d, 0xff)
+  elseif mc == 0 then
+    return s.."\n"
+  end
+  local kc
+  if mc == 10*128 then -- BCMstr
+    kc = funck(func, -d-1)
+    kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub))
+  elseif mc == 9*128 then -- BCMnum
+    kc = funck(func, d)
+    if op == "TSETM " then kc = kc - 2^52 end
+  elseif mc == 12*128 then -- BCMfunc
+    local fi = funcinfo(funck(func, -d-1))
+    if fi.ffid then
+      kc = vmdef.ffnames[fi.ffid]
+    else
+      kc = fi.loc
+    end
+  elseif mc == 5*128 then -- BCMuv
+    kc = funcuvname(func, d)
+  end
+  if ma == 5 then -- BCMuv
+    local ka = funcuvname(func, a)
+    if kc then kc = ka.." ; "..kc else kc = ka end
+  end
+  if mb ~= 0 then
+    local b = shr(ins, 24)
+    if kc then return format("%s%3d %3d  ; %s\n", s, b, d, kc) end
+    return format("%s%3d %3d\n", s, b, d)
+  end
+  if kc then return format("%s%3d      ; %s\n", s, d, kc) end
+  if mc == 7*128 and d > 32767 then d = d - 65536 end -- BCMlits
+  return format("%s%3d\n", s, d)
+end
+
+-- Collect branch targets of a function.
+local function bctargets(func)
+  local target = {}
+  for pc=1,1000000000 do
+    local ins, m = funcbc(func, pc)
+    if not ins then break end
+    if band(m, 15*128) == 13*128 then target[pc+shr(ins, 16)-0x7fff] = true end
+  end
+  return target
+end
+
+-- Dump bytecode instructions of a function.
+local function bcdump(func, out, all)
+  if not out then out = stdout end
+  local fi = funcinfo(func)
+  if all and fi.children then
+    for n=-1,-1000000000,-1 do
+      local k = funck(func, n)
+      if not k then break end
+      if type(k) == "proto" then bcdump(k, out, true) end
+    end
+  end
+  out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined))
+  local target = bctargets(func)
+  for pc=1,1000000000 do
+    local s = bcline(func, pc, target[pc] and "=>")
+    if not s then break end
+    out:write(s)
+  end
+  out:write("\n")
+  out:flush()
+end
+
+------------------------------------------------------------------------------
+
+-- Active flag and output file handle.
+local active, out
+
+-- List handler.
+local function h_list(func)
+  return bcdump(func, out)
+end
+
+-- Detach list handler.
+local function bclistoff()
+  if active then
+    active = false
+    jit.attach(h_list)
+    if out and out ~= stdout and out ~= stderr then out:close() end
+    out = nil
+  end
+end
+
+-- Open the output file and attach list handler.
+local function bcliston(outfile)
+  if active then bclistoff() end
+  if not outfile then outfile = os.getenv("LUAJIT_LISTFILE") end
+  if outfile then
+    out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+  else
+    out = stderr
+  end
+  jit.attach(h_list, "bc")
+  active = true
+end
+
+-- Public module functions.
+module(...)
+
+line = bcline
+dump = bcdump
+targets = bctargets
+
+on = bcliston
+off = bclistoff
+start = bcliston -- For -j command line option.
+

+ 659 - 0
luajit.mod/luajit/src/jit/bcsave.lua

@@ -0,0 +1,659 @@
+----------------------------------------------------------------------------
+-- LuaJIT module to save/list bytecode.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module saves or lists the bytecode for an input file.
+-- It's run by the -b command line option.
+--
+------------------------------------------------------------------------------
+
+local jit = require("jit")
+assert(jit.version_num == 20004, "LuaJIT core/library version mismatch")
+local bit = require("bit")
+
+-- Symbol name prefix for LuaJIT bytecode.
+local LJBC_PREFIX = "luaJIT_BC_"
+
+------------------------------------------------------------------------------
+
+local function usage()
+  io.stderr:write[[
+Save LuaJIT bytecode: luajit -b[options] input output
+  -l        Only list bytecode.
+  -s        Strip debug info (default).
+  -g        Keep debug info.
+  -n name   Set module name (default: auto-detect from input name).
+  -t type   Set output file type (default: auto-detect from output name).
+  -a arch   Override architecture for object files (default: native).
+  -o os     Override OS for object files (default: native).
+  -e chunk  Use chunk string as input.
+  --        Stop handling options.
+  -         Use stdin as input and/or stdout as output.
+
+File types: c h obj o raw (default)
+]]
+  os.exit(1)
+end
+
+local function check(ok, ...)
+  if ok then return ok, ... end
+  io.stderr:write("luajit: ", ...)
+  io.stderr:write("\n")
+  os.exit(1)
+end
+
+local function readfile(input)
+  if type(input) == "function" then return input end
+  if input == "-" then input = nil end
+  return check(loadfile(input))
+end
+
+local function savefile(name, mode)
+  if name == "-" then return io.stdout end
+  return check(io.open(name, mode))
+end
+
+------------------------------------------------------------------------------
+
+local map_type = {
+  raw = "raw", c = "c", h = "h", o = "obj", obj = "obj",
+}
+
+local map_arch = {
+  x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true,
+  mips = true, mipsel = true,
+}
+
+local map_os = {
+  linux = true, windows = true, osx = true, freebsd = true, netbsd = true,
+  openbsd = true, dragonfly = true, solaris = true,
+}
+
+local function checkarg(str, map, err)
+  str = string.lower(str)
+  local s = check(map[str], "unknown ", err)
+  return s == true and str or s
+end
+
+local function detecttype(str)
+  local ext = string.match(string.lower(str), "%.(%a+)$")
+  return map_type[ext] or "raw"
+end
+
+local function checkmodname(str)
+  check(string.match(str, "^[%w_.%-]+$"), "bad module name")
+  return string.gsub(str, "[%.%-]", "_")
+end
+
+local function detectmodname(str)
+  if type(str) == "string" then
+    local tail = string.match(str, "[^/\\]+$")
+    if tail then str = tail end
+    local head = string.match(str, "^(.*)%.[^.]*$")
+    if head then str = head end
+    str = string.match(str, "^[%w_.%-]+")
+  else
+    str = nil
+  end
+  check(str, "cannot derive module name, use -n name")
+  return string.gsub(str, "[%.%-]", "_")
+end
+
+------------------------------------------------------------------------------
+
+local function bcsave_tail(fp, output, s)
+  local ok, err = fp:write(s)
+  if ok and output ~= "-" then ok, err = fp:close() end
+  check(ok, "cannot write ", output, ": ", err)
+end
+
+local function bcsave_raw(output, s)
+  local fp = savefile(output, "wb")
+  bcsave_tail(fp, output, s)
+end
+
+local function bcsave_c(ctx, output, s)
+  local fp = savefile(output, "w")
+  if ctx.type == "c" then
+    fp:write(string.format([[
+#ifdef _cplusplus
+extern "C"
+#endif
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+const char %s%s[] = {
+]], LJBC_PREFIX, ctx.modname))
+  else
+    fp:write(string.format([[
+#define %s%s_SIZE %d
+static const char %s%s[] = {
+]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
+  end
+  local t, n, m = {}, 0, 0
+  for i=1,#s do
+    local b = tostring(string.byte(s, i))
+    m = m + #b + 1
+    if m > 78 then
+      fp:write(table.concat(t, ",", 1, n), ",\n")
+      n, m = 0, #b + 1
+    end
+    n = n + 1
+    t[n] = b
+  end
+  bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n")
+end
+
+local function bcsave_elfobj(ctx, output, s, ffi)
+  ffi.cdef[[
+typedef struct {
+  uint8_t emagic[4], eclass, eendian, eversion, eosabi, eabiversion, epad[7];
+  uint16_t type, machine;
+  uint32_t version;
+  uint32_t entry, phofs, shofs;
+  uint32_t flags;
+  uint16_t ehsize, phentsize, phnum, shentsize, shnum, shstridx;
+} ELF32header;
+typedef struct {
+  uint8_t emagic[4], eclass, eendian, eversion, eosabi, eabiversion, epad[7];
+  uint16_t type, machine;
+  uint32_t version;
+  uint64_t entry, phofs, shofs;
+  uint32_t flags;
+  uint16_t ehsize, phentsize, phnum, shentsize, shnum, shstridx;
+} ELF64header;
+typedef struct {
+  uint32_t name, type, flags, addr, ofs, size, link, info, align, entsize;
+} ELF32sectheader;
+typedef struct {
+  uint32_t name, type;
+  uint64_t flags, addr, ofs, size;
+  uint32_t link, info;
+  uint64_t align, entsize;
+} ELF64sectheader;
+typedef struct {
+  uint32_t name, value, size;
+  uint8_t info, other;
+  uint16_t sectidx;
+} ELF32symbol;
+typedef struct {
+  uint32_t name;
+  uint8_t info, other;
+  uint16_t sectidx;
+  uint64_t value, size;
+} ELF64symbol;
+typedef struct {
+  ELF32header hdr;
+  ELF32sectheader sect[6];
+  ELF32symbol sym[2];
+  uint8_t space[4096];
+} ELF32obj;
+typedef struct {
+  ELF64header hdr;
+  ELF64sectheader sect[6];
+  ELF64symbol sym[2];
+  uint8_t space[4096];
+} ELF64obj;
+]]
+  local symname = LJBC_PREFIX..ctx.modname
+  local is64, isbe = false, false
+  if ctx.arch == "x64" then
+    is64 = true
+  elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
+    isbe = true
+  end
+
+  -- Handle different host/target endianess.
+  local function f32(x) return x end
+  local f16, fofs = f32, f32
+  if ffi.abi("be") ~= isbe then
+    f32 = bit.bswap
+    function f16(x) return bit.rshift(bit.bswap(x), 16) end
+    if is64 then
+      local two32 = ffi.cast("int64_t", 2^32)
+      function fofs(x) return bit.bswap(x)*two32 end
+    else
+      fofs = f32
+    end
+  end
+
+  -- Create ELF object and fill in header.
+  local o = ffi.new(is64 and "ELF64obj" or "ELF32obj")
+  local hdr = o.hdr
+  if ctx.os == "bsd" or ctx.os == "other" then -- Determine native hdr.eosabi.
+    local bf = assert(io.open("/bin/ls", "rb"))
+    local bs = bf:read(9)
+    bf:close()
+    ffi.copy(o, bs, 9)
+    check(hdr.emagic[0] == 127, "no support for writing native object files")
+  else
+    hdr.emagic = "\127ELF"
+    hdr.eosabi = ({ freebsd=9, netbsd=2, openbsd=12, solaris=6 })[ctx.os] or 0
+  end
+  hdr.eclass = is64 and 2 or 1
+  hdr.eendian = isbe and 2 or 1
+  hdr.eversion = 1
+  hdr.type = f16(1)
+  hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch])
+  if ctx.arch == "mips" or ctx.arch == "mipsel" then
+    hdr.flags = 0x50001006
+  end
+  hdr.version = f32(1)
+  hdr.shofs = fofs(ffi.offsetof(o, "sect"))
+  hdr.ehsize = f16(ffi.sizeof(hdr))
+  hdr.shentsize = f16(ffi.sizeof(o.sect[0]))
+  hdr.shnum = f16(6)
+  hdr.shstridx = f16(2)
+
+  -- Fill in sections and symbols.
+  local sofs, ofs = ffi.offsetof(o, "space"), 1
+  for i,name in ipairs{
+      ".symtab", ".shstrtab", ".strtab", ".rodata", ".note.GNU-stack",
+    } do
+    local sect = o.sect[i]
+    sect.align = fofs(1)
+    sect.name = f32(ofs)
+    ffi.copy(o.space+ofs, name)
+    ofs = ofs + #name+1
+  end
+  o.sect[1].type = f32(2) -- .symtab
+  o.sect[1].link = f32(3)
+  o.sect[1].info = f32(1)
+  o.sect[1].align = fofs(8)
+  o.sect[1].ofs = fofs(ffi.offsetof(o, "sym"))
+  o.sect[1].entsize = fofs(ffi.sizeof(o.sym[0]))
+  o.sect[1].size = fofs(ffi.sizeof(o.sym))
+  o.sym[1].name = f32(1)
+  o.sym[1].sectidx = f16(4)
+  o.sym[1].size = fofs(#s)
+  o.sym[1].info = 17
+  o.sect[2].type = f32(3) -- .shstrtab
+  o.sect[2].ofs = fofs(sofs)
+  o.sect[2].size = fofs(ofs)
+  o.sect[3].type = f32(3) -- .strtab
+  o.sect[3].ofs = fofs(sofs + ofs)
+  o.sect[3].size = fofs(#symname+1)
+  ffi.copy(o.space+ofs+1, symname)
+  ofs = ofs + #symname + 2
+  o.sect[4].type = f32(1) -- .rodata
+  o.sect[4].flags = fofs(2)
+  o.sect[4].ofs = fofs(sofs + ofs)
+  o.sect[4].size = fofs(#s)
+  o.sect[5].type = f32(1) -- .note.GNU-stack
+  o.sect[5].ofs = fofs(sofs + ofs + #s)
+
+  -- Write ELF object file.
+  local fp = savefile(output, "wb")
+  fp:write(ffi.string(o, ffi.sizeof(o)-4096+ofs))
+  bcsave_tail(fp, output, s)
+end
+
+local function bcsave_peobj(ctx, output, s, ffi)
+  ffi.cdef[[
+typedef struct {
+  uint16_t arch, nsects;
+  uint32_t time, symtabofs, nsyms;
+  uint16_t opthdrsz, flags;
+} PEheader;
+typedef struct {
+  char name[8];
+  uint32_t vsize, vaddr, size, ofs, relocofs, lineofs;
+  uint16_t nreloc, nline;
+  uint32_t flags;
+} PEsection;
+typedef struct __attribute((packed)) {
+  union {
+    char name[8];
+    uint32_t nameref[2];
+  };
+  uint32_t value;
+  int16_t sect;
+  uint16_t type;
+  uint8_t scl, naux;
+} PEsym;
+typedef struct __attribute((packed)) {
+  uint32_t size;
+  uint16_t nreloc, nline;
+  uint32_t cksum;
+  uint16_t assoc;
+  uint8_t comdatsel, unused[3];
+} PEsymaux;
+typedef struct {
+  PEheader hdr;
+  PEsection sect[2];
+  // Must be an even number of symbol structs.
+  PEsym sym0;
+  PEsymaux sym0aux;
+  PEsym sym1;
+  PEsymaux sym1aux;
+  PEsym sym2;
+  PEsym sym3;
+  uint32_t strtabsize;
+  uint8_t space[4096];
+} PEobj;
+]]
+  local symname = LJBC_PREFIX..ctx.modname
+  local is64 = false
+  if ctx.arch == "x86" then
+    symname = "_"..symname
+  elseif ctx.arch == "x64" then
+    is64 = true
+  end
+  local symexport = "   /EXPORT:"..symname..",DATA "
+
+  -- The file format is always little-endian. Swap if the host is big-endian.
+  local function f32(x) return x end
+  local f16 = f32
+  if ffi.abi("be") then
+    f32 = bit.bswap
+    function f16(x) return bit.rshift(bit.bswap(x), 16) end
+  end
+
+  -- Create PE object and fill in header.
+  local o = ffi.new("PEobj")
+  local hdr = o.hdr
+  hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch])
+  hdr.nsects = f16(2)
+  hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
+  hdr.nsyms = f32(6)
+
+  -- Fill in sections and symbols.
+  o.sect[0].name = ".drectve"
+  o.sect[0].size = f32(#symexport)
+  o.sect[0].flags = f32(0x00100a00)
+  o.sym0.sect = f16(1)
+  o.sym0.scl = 3
+  o.sym0.name = ".drectve"
+  o.sym0.naux = 1
+  o.sym0aux.size = f32(#symexport)
+  o.sect[1].name = ".rdata"
+  o.sect[1].size = f32(#s)
+  o.sect[1].flags = f32(0x40300040)
+  o.sym1.sect = f16(2)
+  o.sym1.scl = 3
+  o.sym1.name = ".rdata"
+  o.sym1.naux = 1
+  o.sym1aux.size = f32(#s)
+  o.sym2.sect = f16(2)
+  o.sym2.scl = 2
+  o.sym2.nameref[1] = f32(4)
+  o.sym3.sect = f16(-1)
+  o.sym3.scl = 2
+  o.sym3.value = f32(1)
+  o.sym3.name = "@feat.00" -- Mark as SafeSEH compliant.
+  ffi.copy(o.space, symname)
+  local ofs = #symname + 1
+  o.strtabsize = f32(ofs + 4)
+  o.sect[0].ofs = f32(ffi.offsetof(o, "space") + ofs)
+  ffi.copy(o.space + ofs, symexport)
+  ofs = ofs + #symexport
+  o.sect[1].ofs = f32(ffi.offsetof(o, "space") + ofs)
+
+  -- Write PE object file.
+  local fp = savefile(output, "wb")
+  fp:write(ffi.string(o, ffi.sizeof(o)-4096+ofs))
+  bcsave_tail(fp, output, s)
+end
+
+local function bcsave_machobj(ctx, output, s, ffi)
+  ffi.cdef[[
+typedef struct
+{
+  uint32_t magic, cputype, cpusubtype, filetype, ncmds, sizeofcmds, flags;
+} mach_header;
+typedef struct
+{
+  mach_header; uint32_t reserved;
+} mach_header_64;
+typedef struct {
+  uint32_t cmd, cmdsize;
+  char segname[16];
+  uint32_t vmaddr, vmsize, fileoff, filesize;
+  uint32_t maxprot, initprot, nsects, flags;
+} mach_segment_command;
+typedef struct {
+  uint32_t cmd, cmdsize;
+  char segname[16];
+  uint64_t vmaddr, vmsize, fileoff, filesize;
+  uint32_t maxprot, initprot, nsects, flags;
+} mach_segment_command_64;
+typedef struct {
+  char sectname[16], segname[16];
+  uint32_t addr, size;
+  uint32_t offset, align, reloff, nreloc, flags;
+  uint32_t reserved1, reserved2;
+} mach_section;
+typedef struct {
+  char sectname[16], segname[16];
+  uint64_t addr, size;
+  uint32_t offset, align, reloff, nreloc, flags;
+  uint32_t reserved1, reserved2, reserved3;
+} mach_section_64;
+typedef struct {
+  uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize;
+} mach_symtab_command;
+typedef struct {
+  int32_t strx;
+  uint8_t type, sect;
+  int16_t desc;
+  uint32_t value;
+} mach_nlist;
+typedef struct {
+  uint32_t strx;
+  uint8_t type, sect;
+  uint16_t desc;
+  uint64_t value;
+} mach_nlist_64;
+typedef struct
+{
+  uint32_t magic, nfat_arch;
+} mach_fat_header;
+typedef struct
+{
+  uint32_t cputype, cpusubtype, offset, size, align;
+} mach_fat_arch;
+typedef struct {
+  struct {
+    mach_header hdr;
+    mach_segment_command seg;
+    mach_section sec;
+    mach_symtab_command sym;
+  } arch[1];
+  mach_nlist sym_entry;
+  uint8_t space[4096];
+} mach_obj;
+typedef struct {
+  struct {
+    mach_header_64 hdr;
+    mach_segment_command_64 seg;
+    mach_section_64 sec;
+    mach_symtab_command sym;
+  } arch[1];
+  mach_nlist_64 sym_entry;
+  uint8_t space[4096];
+} mach_obj_64;
+typedef struct {
+  mach_fat_header fat;
+  mach_fat_arch fat_arch[4];
+  struct {
+    mach_header hdr;
+    mach_segment_command seg;
+    mach_section sec;
+    mach_symtab_command sym;
+  } arch[4];
+  mach_nlist sym_entry;
+  uint8_t space[4096];
+} mach_fat_obj;
+]]
+  local symname = '_'..LJBC_PREFIX..ctx.modname
+  local isfat, is64, align, mobj = false, false, 4, "mach_obj"
+  if ctx.arch == "x64" then
+    is64, align, mobj = true, 8, "mach_obj_64"
+  elseif ctx.arch == "arm" then
+    isfat, mobj = true, "mach_fat_obj"
+  else
+    check(ctx.arch == "x86", "unsupported architecture for OSX")
+  end
+  local function aligned(v, a) return bit.band(v+a-1, -a) end
+  local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE.
+
+  -- Create Mach-O object and fill in header.
+  local o = ffi.new(mobj)
+  local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
+  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch]
+  local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch]
+  if isfat then
+    o.fat.magic = be32(0xcafebabe)
+    o.fat.nfat_arch = be32(#cpusubtype)
+  end
+
+  -- Fill in sections and symbols.
+  for i=0,#cpusubtype-1 do
+    local ofs = 0
+    if isfat then
+      local a = o.fat_arch[i]
+      a.cputype = be32(cputype[i+1])
+      a.cpusubtype = be32(cpusubtype[i+1])
+      -- Subsequent slices overlap each other to share data.
+      ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0])
+      a.offset = be32(ofs)
+      a.size = be32(mach_size-ofs+#s)
+    end
+    local a = o.arch[i]
+    a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface
+    a.hdr.cputype = cputype[i+1]
+    a.hdr.cpusubtype = cpusubtype[i+1]
+    a.hdr.filetype = 1
+    a.hdr.ncmds = 2
+    a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym)
+    a.seg.cmd = is64 and 0x19 or 0x1
+    a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)
+    a.seg.vmsize = #s
+    a.seg.fileoff = mach_size-ofs
+    a.seg.filesize = #s
+    a.seg.maxprot = 1
+    a.seg.initprot = 1
+    a.seg.nsects = 1
+    ffi.copy(a.sec.sectname, "__data")
+    ffi.copy(a.sec.segname, "__DATA")
+    a.sec.size = #s
+    a.sec.offset = mach_size-ofs
+    a.sym.cmd = 2
+    a.sym.cmdsize = ffi.sizeof(a.sym)
+    a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs
+    a.sym.nsyms = 1
+    a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs
+    a.sym.strsize = aligned(#symname+2, align)
+  end
+  o.sym_entry.type = 0xf
+  o.sym_entry.sect = 1
+  o.sym_entry.strx = 1
+  ffi.copy(o.space+1, symname)
+
+  -- Write Macho-O object file.
+  local fp = savefile(output, "wb")
+  fp:write(ffi.string(o, mach_size))
+  bcsave_tail(fp, output, s)
+end
+
+local function bcsave_obj(ctx, output, s)
+  local ok, ffi = pcall(require, "ffi")
+  check(ok, "FFI library required to write this file type")
+  if ctx.os == "windows" then
+    return bcsave_peobj(ctx, output, s, ffi)
+  elseif ctx.os == "osx" then
+    return bcsave_machobj(ctx, output, s, ffi)
+  else
+    return bcsave_elfobj(ctx, output, s, ffi)
+  end
+end
+
+------------------------------------------------------------------------------
+
+local function bclist(input, output)
+  local f = readfile(input)
+  require("jit.bc").dump(f, savefile(output, "w"), true)
+end
+
+local function bcsave(ctx, input, output)
+  local f = readfile(input)
+  local s = string.dump(f, ctx.strip)
+  local t = ctx.type
+  if not t then
+    t = detecttype(output)
+    ctx.type = t
+  end
+  if t == "raw" then
+    bcsave_raw(output, s)
+  else
+    if not ctx.modname then ctx.modname = detectmodname(input) end
+    if t == "obj" then
+      bcsave_obj(ctx, output, s)
+    else
+      bcsave_c(ctx, output, s)
+    end
+  end
+end
+
+local function docmd(...)
+  local arg = {...}
+  local n = 1
+  local list = false
+  local ctx = {
+    strip = true, arch = jit.arch, os = string.lower(jit.os),
+    type = false, modname = false,
+  }
+  while n <= #arg do
+    local a = arg[n]
+    if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then
+      table.remove(arg, n)
+      if a == "--" then break end
+      for m=2,#a do
+	local opt = string.sub(a, m, m)
+	if opt == "l" then
+	  list = true
+	elseif opt == "s" then
+	  ctx.strip = true
+	elseif opt == "g" then
+	  ctx.strip = false
+	else
+	  if arg[n] == nil or m ~= #a then usage() end
+	  if opt == "e" then
+	    if n ~= 1 then usage() end
+	    arg[1] = check(loadstring(arg[1]))
+	  elseif opt == "n" then
+	    ctx.modname = checkmodname(table.remove(arg, n))
+	  elseif opt == "t" then
+	    ctx.type = checkarg(table.remove(arg, n), map_type, "file type")
+	  elseif opt == "a" then
+	    ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture")
+	  elseif opt == "o" then
+	    ctx.os = checkarg(table.remove(arg, n), map_os, "OS name")
+	  else
+	    usage()
+	  end
+	end
+      end
+    else
+      n = n + 1
+    end
+  end
+  if list then
+    if #arg == 0 or #arg > 2 then usage() end
+    bclist(arg[1], arg[2] or "-")
+  else
+    if #arg ~= 2 then usage() end
+    bcsave(ctx, arg[1], arg[2])
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Public module functions.
+module(...)
+
+start = docmd -- Process -b command line option.
+

+ 689 - 0
luajit.mod/luajit/src/jit/dis_arm.lua

@@ -0,0 +1,689 @@
+----------------------------------------------------------------------------
+-- LuaJIT ARM disassembler module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- It disassembles most user-mode ARMv7 instructions
+-- NYI: Advanced SIMD and VFP instructions.
+------------------------------------------------------------------------------
+
+local type = type
+local sub, byte, format = string.sub, string.byte, string.format
+local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local concat = table.concat
+local bit = require("bit")
+local band, bor, ror, tohex = bit.band, bit.bor, bit.ror, bit.tohex
+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+
+------------------------------------------------------------------------------
+-- Opcode maps
+------------------------------------------------------------------------------
+
+local map_loadc = {
+  shift = 8, mask = 15,
+  [10] = {
+    shift = 20, mask = 1,
+    [0] = {
+      shift = 23, mask = 3,
+      [0] = "vmovFmDN", "vstmFNdr",
+      _ = {
+	shift = 21, mask = 1,
+	[0] = "vstrFdl",
+	{ shift = 16, mask = 15, [13] = "vpushFdr", _ = "vstmdbFNdr", }
+      },
+    },
+    {
+      shift = 23, mask = 3,
+      [0] = "vmovFDNm",
+      { shift = 16, mask = 15, [13] = "vpopFdr", _ = "vldmFNdr", },
+      _ = {
+	shift = 21, mask = 1,
+	[0] = "vldrFdl", "vldmdbFNdr",
+      },
+    },
+  },
+  [11] = {
+    shift = 20, mask = 1,
+    [0] = {
+      shift = 23, mask = 3,
+      [0] = "vmovGmDN", "vstmGNdr",
+      _ = {
+	shift = 21, mask = 1,
+	[0] = "vstrGdl",
+	{ shift = 16, mask = 15, [13] = "vpushGdr", _ = "vstmdbGNdr", }
+      },
+    },
+    {
+      shift = 23, mask = 3,
+      [0] = "vmovGDNm",
+      { shift = 16, mask = 15, [13] = "vpopGdr", _ = "vldmGNdr", },
+      _ = {
+	shift = 21, mask = 1,
+	[0] = "vldrGdl", "vldmdbGNdr",
+      },
+    },
+  },
+  _ = {
+    shift = 0, mask = 0 -- NYI ldc, mcrr, mrrc.
+  },
+}
+
+local map_vfps = {
+  shift = 6, mask = 0x2c001,
+  [0] = "vmlaF.dnm", "vmlsF.dnm",
+  [0x04000] = "vnmlsF.dnm", [0x04001] = "vnmlaF.dnm",
+  [0x08000] = "vmulF.dnm", [0x08001] = "vnmulF.dnm",
+  [0x0c000] = "vaddF.dnm", [0x0c001] = "vsubF.dnm",
+  [0x20000] = "vdivF.dnm",
+  [0x24000] = "vfnmsF.dnm", [0x24001] = "vfnmaF.dnm",
+  [0x28000] = "vfmaF.dnm", [0x28001] = "vfmsF.dnm",
+  [0x2c000] = "vmovF.dY",
+  [0x2c001] = {
+    shift = 7, mask = 0x1e01,
+    [0] = "vmovF.dm", "vabsF.dm",
+    [0x0200] = "vnegF.dm", [0x0201] = "vsqrtF.dm",
+    [0x0800] = "vcmpF.dm", [0x0801] = "vcmpeF.dm",
+    [0x0a00] = "vcmpzF.d", [0x0a01] = "vcmpzeF.d",
+    [0x0e01] = "vcvtG.dF.m",
+    [0x1000] = "vcvt.f32.u32Fdm", [0x1001] = "vcvt.f32.s32Fdm",
+    [0x1800] = "vcvtr.u32F.dm", [0x1801] = "vcvt.u32F.dm",
+    [0x1a00] = "vcvtr.s32F.dm", [0x1a01] = "vcvt.s32F.dm",
+  },
+}
+
+local map_vfpd = {
+  shift = 6, mask = 0x2c001,
+  [0] = "vmlaG.dnm", "vmlsG.dnm",
+  [0x04000] = "vnmlsG.dnm", [0x04001] = "vnmlaG.dnm",
+  [0x08000] = "vmulG.dnm", [0x08001] = "vnmulG.dnm",
+  [0x0c000] = "vaddG.dnm", [0x0c001] = "vsubG.dnm",
+  [0x20000] = "vdivG.dnm",
+  [0x24000] = "vfnmsG.dnm", [0x24001] = "vfnmaG.dnm",
+  [0x28000] = "vfmaG.dnm", [0x28001] = "vfmsG.dnm",
+  [0x2c000] = "vmovG.dY",
+  [0x2c001] = {
+    shift = 7, mask = 0x1e01,
+    [0] = "vmovG.dm", "vabsG.dm",
+    [0x0200] = "vnegG.dm", [0x0201] = "vsqrtG.dm",
+    [0x0800] = "vcmpG.dm", [0x0801] = "vcmpeG.dm",
+    [0x0a00] = "vcmpzG.d", [0x0a01] = "vcmpzeG.d",
+    [0x0e01] = "vcvtF.dG.m",
+    [0x1000] = "vcvt.f64.u32GdFm", [0x1001] = "vcvt.f64.s32GdFm",
+    [0x1800] = "vcvtr.u32FdG.m", [0x1801] = "vcvt.u32FdG.m",
+    [0x1a00] = "vcvtr.s32FdG.m", [0x1a01] = "vcvt.s32FdG.m",
+  },
+}
+
+local map_datac = {
+  shift = 24, mask = 1,
+  [0] = {
+    shift = 4, mask = 1,
+    [0] = {
+      shift = 8, mask = 15,
+      [10] = map_vfps,
+      [11] = map_vfpd,
+      -- NYI cdp, mcr, mrc.
+    },
+    {
+      shift = 8, mask = 15,
+      [10] = {
+	shift = 20, mask = 15,
+	[0] = "vmovFnD", "vmovFDn",
+	[14] = "vmsrD",
+	[15] = { shift = 12, mask = 15, [15] = "vmrs", _ = "vmrsD", },
+      },
+    },
+  },
+  "svcT",
+}
+
+local map_loadcu = {
+  shift = 0, mask = 0, -- NYI unconditional CP load/store.
+}
+
+local map_datacu = {
+  shift = 0, mask = 0, -- NYI unconditional CP data.
+}
+
+local map_simddata = {
+  shift = 0, mask = 0, -- NYI SIMD data.
+}
+
+local map_simdload = {
+  shift = 0, mask = 0, -- NYI SIMD load/store, preload.
+}
+
+local map_preload = {
+  shift = 0, mask = 0, -- NYI preload.
+}
+
+local map_media = {
+  shift = 20, mask = 31,
+  [0] = false,
+  { --01
+    shift = 5, mask = 7,
+    [0] = "sadd16DNM", "sasxDNM", "ssaxDNM", "ssub16DNM",
+    "sadd8DNM", false, false, "ssub8DNM",
+  },
+  { --02
+    shift = 5, mask = 7,
+    [0] = "qadd16DNM", "qasxDNM", "qsaxDNM", "qsub16DNM",
+    "qadd8DNM", false, false, "qsub8DNM",
+  },
+  { --03
+    shift = 5, mask = 7,
+    [0] = "shadd16DNM", "shasxDNM", "shsaxDNM", "shsub16DNM",
+    "shadd8DNM", false, false, "shsub8DNM",
+  },
+  false,
+  { --05
+    shift = 5, mask = 7,
+    [0] = "uadd16DNM", "uasxDNM", "usaxDNM", "usub16DNM",
+    "uadd8DNM", false, false, "usub8DNM",
+  },
+  { --06
+    shift = 5, mask = 7,
+    [0] = "uqadd16DNM", "uqasxDNM", "uqsaxDNM", "uqsub16DNM",
+    "uqadd8DNM", false, false, "uqsub8DNM",
+  },
+  { --07
+    shift = 5, mask = 7,
+    [0] = "uhadd16DNM", "uhasxDNM", "uhsaxDNM", "uhsub16DNM",
+    "uhadd8DNM", false, false, "uhsub8DNM",
+  },
+  { --08
+    shift = 5, mask = 7,
+    [0] = "pkhbtDNMU", false, "pkhtbDNMU",
+    { shift = 16, mask = 15, [15] = "sxtb16DMU", _ = "sxtab16DNMU", },
+    "pkhbtDNMU", "selDNM", "pkhtbDNMU",
+  },
+  false,
+  { --0a
+    shift = 5, mask = 7,
+    [0] = "ssatDxMu", "ssat16DxM", "ssatDxMu",
+    { shift = 16, mask = 15, [15] = "sxtbDMU", _ = "sxtabDNMU", },
+    "ssatDxMu", false, "ssatDxMu",
+  },
+  { --0b
+    shift = 5, mask = 7,
+    [0] = "ssatDxMu", "revDM", "ssatDxMu",
+    { shift = 16, mask = 15, [15] = "sxthDMU", _ = "sxtahDNMU", },
+    "ssatDxMu", "rev16DM", "ssatDxMu",
+  },
+  { --0c
+    shift = 5, mask = 7,
+    [3] = { shift = 16, mask = 15, [15] = "uxtb16DMU", _ = "uxtab16DNMU", },
+  },
+  false,
+  { --0e
+    shift = 5, mask = 7,
+    [0] = "usatDwMu", "usat16DwM", "usatDwMu",
+    { shift = 16, mask = 15, [15] = "uxtbDMU", _ = "uxtabDNMU", },
+    "usatDwMu", false, "usatDwMu",
+  },
+  { --0f
+    shift = 5, mask = 7,
+    [0] = "usatDwMu", "rbitDM", "usatDwMu",
+    { shift = 16, mask = 15, [15] = "uxthDMU", _ = "uxtahDNMU", },
+    "usatDwMu", "revshDM", "usatDwMu",
+  },
+  { --10
+    shift = 12, mask = 15,
+    [15] = {
+      shift = 5, mask = 7,
+      "smuadNMS", "smuadxNMS", "smusdNMS", "smusdxNMS",
+    },
+    _ = {
+      shift = 5, mask = 7,
+      [0] = "smladNMSD", "smladxNMSD", "smlsdNMSD", "smlsdxNMSD",
+    },
+  },
+  false, false, false,
+  { --14
+    shift = 5, mask = 7,
+    [0] = "smlaldDNMS", "smlaldxDNMS", "smlsldDNMS", "smlsldxDNMS",
+  },
+  { --15
+    shift = 5, mask = 7,
+    [0] = { shift = 12, mask = 15, [15] = "smmulNMS", _ = "smmlaNMSD", },
+    { shift = 12, mask = 15, [15] = "smmulrNMS", _ = "smmlarNMSD", },
+    false, false, false, false,
+    "smmlsNMSD", "smmlsrNMSD",
+  },
+  false, false,
+  { --18
+    shift = 5, mask = 7,
+    [0] = { shift = 12, mask = 15, [15] = "usad8NMS", _ = "usada8NMSD", },
+  },
+  false,
+  { --1a
+    shift = 5, mask = 3, [2] = "sbfxDMvw",
+  },
+  { --1b
+    shift = 5, mask = 3, [2] = "sbfxDMvw",
+  },
+  { --1c
+    shift = 5, mask = 3,
+    [0] = { shift = 0, mask = 15, [15] = "bfcDvX", _ = "bfiDMvX", },
+  },
+  { --1d
+    shift = 5, mask = 3,
+    [0] = { shift = 0, mask = 15, [15] = "bfcDvX", _ = "bfiDMvX", },
+  },
+  { --1e
+    shift = 5, mask = 3, [2] = "ubfxDMvw",
+  },
+  { --1f
+    shift = 5, mask = 3, [2] = "ubfxDMvw",
+  },
+}
+
+local map_load = {
+  shift = 21, mask = 9,
+  {
+    shift = 20, mask = 5,
+    [0] = "strtDL", "ldrtDL", [4] = "strbtDL", [5] = "ldrbtDL",
+  },
+  _ = {
+    shift = 20, mask = 5,
+    [0] = "strDL", "ldrDL", [4] = "strbDL", [5] = "ldrbDL",
+  }
+}
+
+local map_load1 = {
+  shift = 4, mask = 1,
+  [0] = map_load, map_media,
+}
+
+local map_loadm = {
+  shift = 20, mask = 1,
+  [0] = {
+    shift = 23, mask = 3,
+    [0] = "stmdaNR", "stmNR",
+    { shift = 16, mask = 63, [45] = "pushR", _ = "stmdbNR", }, "stmibNR",
+  },
+  {
+    shift = 23, mask = 3,
+    [0] = "ldmdaNR", { shift = 16, mask = 63, [61] = "popR", _ = "ldmNR", },
+    "ldmdbNR", "ldmibNR",
+  },
+}
+
+local map_data = {
+  shift = 21, mask = 15,
+  [0] = "andDNPs", "eorDNPs", "subDNPs", "rsbDNPs",
+  "addDNPs", "adcDNPs", "sbcDNPs", "rscDNPs",
+  "tstNP", "teqNP", "cmpNP", "cmnNP",
+  "orrDNPs", "movDPs", "bicDNPs", "mvnDPs",
+}
+
+local map_mul = {
+  shift = 21, mask = 7,
+  [0] = "mulNMSs", "mlaNMSDs", "umaalDNMS", "mlsDNMS",
+  "umullDNMSs", "umlalDNMSs", "smullDNMSs", "smlalDNMSs",
+}
+
+local map_sync = {
+  shift = 20, mask = 15, -- NYI: brackets around N. R(D+1) for ldrexd/strexd.
+  [0] = "swpDMN", false, false, false,
+  "swpbDMN", false, false, false,
+  "strexDMN", "ldrexDN", "strexdDN", "ldrexdDN",
+  "strexbDMN", "ldrexbDN", "strexhDN", "ldrexhDN",
+}
+
+local map_mulh = {
+  shift = 21, mask = 3,
+  [0] = { shift = 5, mask = 3,
+    [0] = "smlabbNMSD", "smlatbNMSD", "smlabtNMSD", "smlattNMSD", },
+  { shift = 5, mask = 3,
+    [0] = "smlawbNMSD", "smulwbNMS", "smlawtNMSD", "smulwtNMS", },
+  { shift = 5, mask = 3,
+    [0] = "smlalbbDNMS", "smlaltbDNMS", "smlalbtDNMS", "smlalttDNMS", },
+  { shift = 5, mask = 3,
+    [0] = "smulbbNMS", "smultbNMS", "smulbtNMS", "smulttNMS", },
+}
+
+local map_misc = {
+  shift = 4, mask = 7,
+  -- NYI: decode PSR bits of msr.
+  [0] = { shift = 21, mask = 1, [0] = "mrsD", "msrM", },
+  { shift = 21, mask = 3, "bxM", false, "clzDM", },
+  { shift = 21, mask = 3, "bxjM", },
+  { shift = 21, mask = 3, "blxM", },
+  false,
+  { shift = 21, mask = 3, [0] = "qaddDMN", "qsubDMN", "qdaddDMN", "qdsubDMN", },
+  false,
+  { shift = 21, mask = 3, "bkptK", },
+}
+
+local map_datar = {
+  shift = 4, mask = 9,
+  [9] = {
+    shift = 5, mask = 3,
+    [0] = { shift = 24, mask = 1, [0] = map_mul, map_sync, },
+    { shift = 20, mask = 1, [0] = "strhDL", "ldrhDL", },
+    { shift = 20, mask = 1, [0] = "ldrdDL", "ldrsbDL", },
+    { shift = 20, mask = 1, [0] = "strdDL", "ldrshDL", },
+  },
+  _ = {
+    shift = 20, mask = 25,
+    [16] = { shift = 7, mask = 1, [0] = map_misc, map_mulh, },
+    _ = {
+      shift = 0, mask = 0xffffffff,
+      [bor(0xe1a00000)] = "nop",
+      _ = map_data,
+    }
+  },
+}
+
+local map_datai = {
+  shift = 20, mask = 31, -- NYI: decode PSR bits of msr. Decode imm12.
+  [16] = "movwDW", [20] = "movtDW",
+  [18] = { shift = 0, mask = 0xf00ff, [0] = "nopv6", _ = "msrNW", },
+  [22] = "msrNW",
+  _ = map_data,
+}
+
+local map_branch = {
+  shift = 24, mask = 1,
+  [0] = "bB", "blB"
+}
+
+local map_condins = {
+  [0] = map_datar, map_datai, map_load, map_load1,
+  map_loadm, map_branch, map_loadc, map_datac
+}
+
+-- NYI: setend.
+local map_uncondins = {
+  [0] = false, map_simddata, map_simdload, map_preload,
+  false, "blxB", map_loadcu, map_datacu,
+}
+
+------------------------------------------------------------------------------
+
+local map_gpr = {
+  [0] = "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+  "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc",
+}
+
+local map_cond = {
+  [0] = "eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc",
+  "hi", "ls", "ge", "lt", "gt", "le", "al",
+}
+
+local map_shift = { [0] = "lsl", "lsr", "asr", "ror", }
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+  local pos = ctx.pos
+  local extra = ""
+  if ctx.rel then
+    local sym = ctx.symtab[ctx.rel]
+    if sym then
+      extra = "\t->"..sym
+    elseif band(ctx.op, 0x0e000000) ~= 0x0a000000 then
+      extra = "\t; 0x"..tohex(ctx.rel)
+    end
+  end
+  if ctx.hexdump > 0 then
+    ctx.out(format("%08x  %s  %-5s %s%s\n",
+	    ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
+  else
+    ctx.out(format("%08x  %-5s %s%s\n",
+	    ctx.addr+pos, text, concat(operands, ", "), extra))
+  end
+  ctx.pos = pos + 4
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+  return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
+end
+
+-- Format operand 2 of load/store opcodes.
+local function fmtload(ctx, op, pos)
+  local base = map_gpr[band(rshift(op, 16), 15)]
+  local x, ofs
+  local ext = (band(op, 0x04000000) == 0)
+  if not ext and band(op, 0x02000000) == 0 then
+    ofs = band(op, 4095)
+    if band(op, 0x00800000) == 0 then ofs = -ofs end
+    if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end
+    ofs = "#"..ofs
+  elseif ext and band(op, 0x00400000) ~= 0 then
+    ofs = band(op, 15) + band(rshift(op, 4), 0xf0)
+    if band(op, 0x00800000) == 0 then ofs = -ofs end
+    if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end
+    ofs = "#"..ofs
+  else
+    ofs = map_gpr[band(op, 15)]
+    if ext or band(op, 0xfe0) == 0 then
+    elseif band(op, 0xfe0) == 0x60 then
+      ofs = format("%s, rrx", ofs)
+    else
+      local sh = band(rshift(op, 7), 31)
+      if sh == 0 then sh = 32 end
+      ofs = format("%s, %s #%d", ofs, map_shift[band(rshift(op, 5), 3)], sh)
+    end
+    if band(op, 0x00800000) == 0 then ofs = "-"..ofs end
+  end
+  if ofs == "#0" then
+    x = format("[%s]", base)
+  elseif band(op, 0x01000000) == 0 then
+    x = format("[%s], %s", base, ofs)
+  else
+    x = format("[%s, %s]", base, ofs)
+  end
+  if band(op, 0x01200000) == 0x01200000 then x = x.."!" end
+  return x
+end
+
+-- Format operand 2 of vector load/store opcodes.
+local function fmtvload(ctx, op, pos)
+  local base = map_gpr[band(rshift(op, 16), 15)]
+  local ofs = band(op, 255)*4
+  if band(op, 0x00800000) == 0 then ofs = -ofs end
+  if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end
+  if ofs == 0 then
+    return format("[%s]", base)
+  else
+    return format("[%s, #%d]", base, ofs)
+  end
+end
+
+local function fmtvr(op, vr, sh0, sh1)
+  if vr == "s" then
+    return format("s%d", 2*band(rshift(op, sh0), 15)+band(rshift(op, sh1), 1))
+  else
+    return format("d%d", band(rshift(op, sh0), 15)+band(rshift(op, sh1-4), 16))
+  end
+end
+
+-- Disassemble a single instruction.
+local function disass_ins(ctx)
+  local pos = ctx.pos
+  local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+  local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
+  local operands = {}
+  local suffix = ""
+  local last, name, pat
+  local vr
+  ctx.op = op
+  ctx.rel = nil
+
+  local cond = rshift(op, 28)
+  local opat
+  if cond == 15 then
+    opat = map_uncondins[band(rshift(op, 25), 7)]
+  else
+    if cond ~= 14 then suffix = map_cond[cond] end
+    opat = map_condins[band(rshift(op, 25), 7)]
+  end
+  while type(opat) ~= "string" do
+    if not opat then return unknown(ctx) end
+    opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+  end
+  name, pat = match(opat, "^([a-z0-9]*)(.*)")
+  if sub(pat, 1, 1) == "." then
+    local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")
+    suffix = suffix..s2
+    pat = p2
+  end
+
+  for p in gmatch(pat, ".") do
+    local x = nil
+    if p == "D" then
+      x = map_gpr[band(rshift(op, 12), 15)]
+    elseif p == "N" then
+      x = map_gpr[band(rshift(op, 16), 15)]
+    elseif p == "S" then
+      x = map_gpr[band(rshift(op, 8), 15)]
+    elseif p == "M" then
+      x = map_gpr[band(op, 15)]
+    elseif p == "d" then
+      x = fmtvr(op, vr, 12, 22)
+    elseif p == "n" then
+      x = fmtvr(op, vr, 16, 7)
+    elseif p == "m" then
+      x = fmtvr(op, vr, 0, 5)
+    elseif p == "P" then
+      if band(op, 0x02000000) ~= 0 then
+	x = ror(band(op, 255), 2*band(rshift(op, 8), 15))
+      else
+	x = map_gpr[band(op, 15)]
+	if band(op, 0xff0) ~= 0 then
+	  operands[#operands+1] = x
+	  local s = map_shift[band(rshift(op, 5), 3)]
+	  local r = nil
+	  if band(op, 0xf90) == 0 then
+	    if s == "ror" then s = "rrx" else r = "#32" end
+	  elseif band(op, 0x10) == 0 then
+	    r = "#"..band(rshift(op, 7), 31)
+	  else
+	    r = map_gpr[band(rshift(op, 8), 15)]
+	  end
+	  if name == "mov" then name = s; x = r
+	  elseif r then x = format("%s %s", s, r)
+	  else x = s end
+	end
+      end
+    elseif p == "L" then
+      x = fmtload(ctx, op, pos)
+    elseif p == "l" then
+      x = fmtvload(ctx, op, pos)
+    elseif p == "B" then
+      local addr = ctx.addr + pos + 8 + arshift(lshift(op, 8), 6)
+      if cond == 15 then addr = addr + band(rshift(op, 23), 2) end
+      ctx.rel = addr
+      x = "0x"..tohex(addr)
+    elseif p == "F" then
+      vr = "s"
+    elseif p == "G" then
+      vr = "d"
+    elseif p == "." then
+      suffix = suffix..(vr == "s" and ".f32" or ".f64")
+    elseif p == "R" then
+      if band(op, 0x00200000) ~= 0 and #operands == 1 then
+	operands[1] = operands[1].."!"
+      end
+      local t = {}
+      for i=0,15 do
+	if band(rshift(op, i), 1) == 1 then t[#t+1] = map_gpr[i] end
+      end
+      x = "{"..concat(t, ", ").."}"
+    elseif p == "r" then
+      if band(op, 0x00200000) ~= 0 and #operands == 2 then
+	operands[1] = operands[1].."!"
+      end
+      local s = tonumber(sub(last, 2))
+      local n = band(op, 255)
+      if vr == "d" then n = rshift(n, 1) end
+      operands[#operands] = format("{%s-%s%d}", last, vr, s+n-1)
+    elseif p == "W" then
+      x = band(op, 0x0fff) + band(rshift(op, 4), 0xf000)
+    elseif p == "T" then
+      x = "#0x"..tohex(band(op, 0x00ffffff), 6)
+    elseif p == "U" then
+      x = band(rshift(op, 7), 31)
+      if x == 0 then x = nil end
+    elseif p == "u" then
+      x = band(rshift(op, 7), 31)
+      if band(op, 0x40) == 0 then
+	if x == 0 then x = nil else x = "lsl #"..x end
+      else
+	if x == 0 then x = "asr #32" else x = "asr #"..x end
+      end
+    elseif p == "v" then
+      x = band(rshift(op, 7), 31)
+    elseif p == "w" then
+      x = band(rshift(op, 16), 31)
+    elseif p == "x" then
+      x = band(rshift(op, 16), 31) + 1
+    elseif p == "X" then
+      x = band(rshift(op, 16), 31) - last + 1
+    elseif p == "Y" then
+      x = band(rshift(op, 12), 0xf0) + band(op, 0x0f)
+    elseif p == "K" then
+      x = "#0x"..tohex(band(rshift(op, 4), 0x0000fff0) + band(op, 15), 4)
+    elseif p == "s" then
+      if band(op, 0x00100000) ~= 0 then suffix = "s"..suffix end
+    else
+      assert(false)
+    end
+    if x then
+      last = x
+      if type(x) == "number" then x = "#"..x end
+      operands[#operands+1] = x
+    end
+  end
+
+  return putop(ctx, name..suffix, operands)
+end
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+  if not ofs then ofs = 0 end
+  local stop = len and ofs+len or #ctx.code
+  ctx.pos = ofs
+  ctx.rel = nil
+  while ctx.pos < stop do disass_ins(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create_(code, addr, out)
+  local ctx = {}
+  ctx.code = code
+  ctx.addr = addr or 0
+  ctx.out = out or io.write
+  ctx.symtab = {}
+  ctx.disass = disass_block
+  ctx.hexdump = 8
+  return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass_(code, addr, out)
+  create_(code, addr, out):disass()
+end
+
+-- Return register name for RID.
+local function regname_(r)
+  if r < 16 then return map_gpr[r] end
+  return "d"..(r-16)
+end
+
+-- Public module functions.
+module(...)
+
+create = create_
+disass = disass_
+regname = regname_
+

+ 428 - 0
luajit.mod/luajit/src/jit/dis_mips.lua

@@ -0,0 +1,428 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS disassembler module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT/X license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- It disassembles all standard MIPS32R1/R2 instructions.
+-- Default mode is big-endian, but see: dis_mipsel.lua
+------------------------------------------------------------------------------
+
+local type = type
+local sub, byte, format = string.sub, string.byte, string.format
+local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local concat = table.concat
+local bit = require("bit")
+local band, bor, tohex = bit.band, bit.bor, bit.tohex
+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+
+------------------------------------------------------------------------------
+-- Primary and extended opcode maps
+------------------------------------------------------------------------------
+
+local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
+local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", }
+local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", }
+
+local map_special = {
+  shift = 0, mask = 63,
+  [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
+  map_movci,	map_srl,	"sraDTA",
+  "sllvDTS",	false,		map_srlv,	"sravDTS",
+  "jrS",	"jalrD1S",	"movzDST",	"movnDST",
+  "syscallY",	"breakY",	false,		"sync",
+  "mfhiD",	"mthiS",	"mfloD",	"mtloS",
+  false,	false,		false,		false,
+  "multST",	"multuST",	"divST",	"divuST",
+  false,	false,		false,		false,
+  "addDST",	"addu|moveDST0", "subDST",	"subu|neguDS0T",
+  "andDST",	"orDST",	"xorDST",	"nor|notDST0",
+  false,	false,		"sltDST",	"sltuDST",
+  false,	false,		false,		false,
+  "tgeSTZ",	"tgeuSTZ",	"tltSTZ",	"tltuSTZ",
+  "teqSTZ",	false,		"tneSTZ",
+}
+
+local map_special2 = {
+  shift = 0, mask = 63,
+  [0] = "maddST", "madduST",	"mulDST",	false,
+  "msubST",	"msubuST",
+  [32] = "clzDS", [33] = "cloDS",
+  [63] = "sdbbpY",
+}
+
+local map_bshfl = {
+  shift = 6, mask = 31,
+  [2] = "wsbhDT",
+  [16] = "sebDT",
+  [24] = "sehDT",
+}
+
+local map_special3 = {
+  shift = 0, mask = 63,
+  [0] = "extTSAK", [4] = "insTSAL",
+  [32] = map_bshfl,
+  [59] = "rdhwrTD",
+}
+
+local map_regimm = {
+  shift = 16, mask = 31,
+  [0] = "bltzSB",	"bgezSB",	"bltzlSB",	"bgezlSB",
+  false,	false,		false,		false,
+  "tgeiSI",	"tgeiuSI",	"tltiSI",	"tltiuSI",
+  "teqiSI",	false,		"tneiSI",	false,
+  "bltzalSB",	"bgezalSB",	"bltzallSB",	"bgezallSB",
+  false,	false,		false,		false,
+  false,	false,		false,		false,
+  false,	false,		false,		"synciSO",
+}
+
+local map_cop0 = {
+  shift = 25, mask = 1,
+  [0] = {
+    shift = 21, mask = 15,
+    [0] = "mfc0TDW", [4] = "mtc0TDW",
+    [10] = "rdpgprDT",
+    [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
+    [14] = "wrpgprDT",
+  }, {
+    shift = 0, mask = 63,
+    [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
+    [24] = "eret", [31] = "deret",
+    [32] = "wait",
+  },
+}
+
+local map_cop1s = {
+  shift = 0, mask = 63,
+  [0] = "add.sFGH",	"sub.sFGH",	"mul.sFGH",	"div.sFGH",
+  "sqrt.sFG",		"abs.sFG",	"mov.sFG",	"neg.sFG",
+  "round.l.sFG",	"trunc.l.sFG",	"ceil.l.sFG",	"floor.l.sFG",
+  "round.w.sFG",	"trunc.w.sFG",	"ceil.w.sFG",	"floor.w.sFG",
+  false,
+  { shift = 16, mask = 1, [0] = "movf.sFGC", "movt.sFGC" },
+  "movz.sFGT",	"movn.sFGT",
+  false,	"recip.sFG",	"rsqrt.sFG",	false,
+  false,	false,		false,		false,
+  false,	false,		false,		false,
+  false,	"cvt.d.sFG",	false,		false,
+  "cvt.w.sFG",	"cvt.l.sFG",	"cvt.ps.sFGH",	false,
+  false,	false,		false,		false,
+  false,	false,		false,		false,
+  "c.f.sVGH",	"c.un.sVGH",	"c.eq.sVGH",	"c.ueq.sVGH",
+  "c.olt.sVGH",	"c.ult.sVGH",	"c.ole.sVGH",	"c.ule.sVGH",
+  "c.sf.sVGH",	"c.ngle.sVGH",	"c.seq.sVGH",	"c.ngl.sVGH",
+  "c.lt.sVGH",	"c.nge.sVGH",	"c.le.sVGH",	"c.ngt.sVGH",
+}
+
+local map_cop1d = {
+  shift = 0, mask = 63,
+  [0] = "add.dFGH",	"sub.dFGH",	"mul.dFGH",	"div.dFGH",
+  "sqrt.dFG",		"abs.dFG",	"mov.dFG",	"neg.dFG",
+  "round.l.dFG",	"trunc.l.dFG",	"ceil.l.dFG",	"floor.l.dFG",
+  "round.w.dFG",	"trunc.w.dFG",	"ceil.w.dFG",	"floor.w.dFG",
+  false,
+  { shift = 16, mask = 1, [0] = "movf.dFGC", "movt.dFGC" },
+  "movz.dFGT",	"movn.dFGT",
+  false,	"recip.dFG",	"rsqrt.dFG",	false,
+  false,	false,		false,		false,
+  false,	false,		false,		false,
+  "cvt.s.dFG",	false,		false,		false,
+  "cvt.w.dFG",	"cvt.l.dFG",	false,		false,
+  false,	false,		false,		false,
+  false,	false,		false,		false,
+  "c.f.dVGH",	"c.un.dVGH",	"c.eq.dVGH",	"c.ueq.dVGH",
+  "c.olt.dVGH",	"c.ult.dVGH",	"c.ole.dVGH",	"c.ule.dVGH",
+  "c.df.dVGH",	"c.ngle.dVGH",	"c.deq.dVGH",	"c.ngl.dVGH",
+  "c.lt.dVGH",	"c.nge.dVGH",	"c.le.dVGH",	"c.ngt.dVGH",
+}
+
+local map_cop1ps = {
+  shift = 0, mask = 63,
+  [0] = "add.psFGH",	"sub.psFGH",	"mul.psFGH",	false,
+  false,		"abs.psFG",	"mov.psFG",	"neg.psFG",
+  false,		false,		false,		false,
+  false,		false,		false,		false,
+  false,
+  { shift = 16, mask = 1, [0] = "movf.psFGC", "movt.psFGC" },
+  "movz.psFGT",	"movn.psFGT",
+  false,	false,		false,		false,
+  false,	false,		false,		false,
+  false,	false,		false,		false,
+  "cvt.s.puFG",	false,		false,		false,
+  false,	false,		false,		false,
+  "cvt.s.plFG",	false,		false,		false,
+  "pll.psFGH",	"plu.psFGH",	"pul.psFGH",	"puu.psFGH",
+  "c.f.psVGH",	"c.un.psVGH",	"c.eq.psVGH",	"c.ueq.psVGH",
+  "c.olt.psVGH", "c.ult.psVGH",	"c.ole.psVGH",	"c.ule.psVGH",
+  "c.psf.psVGH", "c.ngle.psVGH", "c.pseq.psVGH", "c.ngl.psVGH",
+  "c.lt.psVGH",	"c.nge.psVGH",	"c.le.psVGH",	"c.ngt.psVGH",
+}
+
+local map_cop1w = {
+  shift = 0, mask = 63,
+  [32] = "cvt.s.wFG", [33] = "cvt.d.wFG",
+}
+
+local map_cop1l = {
+  shift = 0, mask = 63,
+  [32] = "cvt.s.lFG", [33] = "cvt.d.lFG",
+}
+
+local map_cop1bc = {
+  shift = 16, mask = 3,
+  [0] = "bc1fCB", "bc1tCB",	"bc1flCB",	"bc1tlCB",
+}
+
+local map_cop1 = {
+  shift = 21, mask = 31,
+  [0] = "mfc1TG", false,	"cfc1TG",	"mfhc1TG",
+  "mtc1TG",	false,		"ctc1TG",	"mthc1TG",
+  map_cop1bc,	false,		false,		false,
+  false,	false,		false,		false,
+  map_cop1s,	map_cop1d,	false,		false,
+  map_cop1w,	map_cop1l,	map_cop1ps,
+}
+
+local map_cop1x = {
+  shift = 0, mask = 63,
+  [0] = "lwxc1FSX",	"ldxc1FSX",	false,		false,
+  false,	"luxc1FSX",	false,		false,
+  "swxc1FSX",	"sdxc1FSX",	false,		false,
+  false,	"suxc1FSX",	false,		"prefxMSX",
+  false,	false,		false,		false,
+  false,	false,		false,		false,
+  false,	false,		false,		false,
+  false,	false,		"alnv.psFGHS",	false,
+  "madd.sFRGH",	"madd.dFRGH",	false,		false,
+  false,	false,		"madd.psFRGH",	false,
+  "msub.sFRGH",	"msub.dFRGH",	false,		false,
+  false,	false,		"msub.psFRGH",	false,
+  "nmadd.sFRGH", "nmadd.dFRGH",	false,		false,
+  false,	false,		"nmadd.psFRGH",	false,
+  "nmsub.sFRGH", "nmsub.dFRGH",	false,		false,
+  false,	false,		"nmsub.psFRGH",	false,
+}
+
+local map_pri = {
+  [0] = map_special,	map_regimm,	"jJ",	"jalJ",
+  "beq|beqz|bST00B",	"bne|bnezST0B",		"blezSB",	"bgtzSB",
+  "addiTSI",	"addiu|liTS0I",	"sltiTSI",	"sltiuTSI",
+  "andiTSU",	"ori|liTS0U",	"xoriTSU",	"luiTU",
+  map_cop0,	map_cop1,	false,		map_cop1x,
+  "beql|beqzlST0B",	"bnel|bnezlST0B",	"blezlSB",	"bgtzlSB",
+  false,	false,		false,		false,
+  map_special2,	false,		false,		map_special3,
+  "lbTSO",	"lhTSO",	"lwlTSO",	"lwTSO",
+  "lbuTSO",	"lhuTSO",	"lwrTSO",	false,
+  "sbTSO",	"shTSO",	"swlTSO",	"swTSO",
+  false,	false,		"swrTSO",	"cacheNSO",
+  "llTSO",	"lwc1HSO",	"lwc2TSO",	"prefNSO",
+  false,	"ldc1HSO",	"ldc2TSO",	false,
+  "scTSO",	"swc1HSO",	"swc2TSO",	false,
+  false,	"sdc1HSO",	"sdc2TSO",	false,
+}
+
+------------------------------------------------------------------------------
+
+local map_gpr = {
+  [0] = "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+  "r24", "r25", "r26", "r27", "r28", "sp", "r30", "ra",
+}
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+  local pos = ctx.pos
+  local extra = ""
+  if ctx.rel then
+    local sym = ctx.symtab[ctx.rel]
+    if sym then extra = "\t->"..sym end
+  end
+  if ctx.hexdump > 0 then
+    ctx.out(format("%08x  %s  %-7s %s%s\n",
+	    ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
+  else
+    ctx.out(format("%08x  %-7s %s%s\n",
+	    ctx.addr+pos, text, concat(operands, ", "), extra))
+  end
+  ctx.pos = pos + 4
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+  return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
+end
+
+local function get_be(ctx)
+  local pos = ctx.pos
+  local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+  return bor(lshift(b0, 24), lshift(b1, 16), lshift(b2, 8), b3)
+end
+
+local function get_le(ctx)
+  local pos = ctx.pos
+  local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+  return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
+end
+
+-- Disassemble a single instruction.
+local function disass_ins(ctx)
+  local op = ctx:get()
+  local operands = {}
+  local last = nil
+  ctx.op = op
+  ctx.rel = nil
+
+  local opat = map_pri[rshift(op, 26)]
+  while type(opat) ~= "string" do
+    if not opat then return unknown(ctx) end
+    opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+  end
+  local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
+  local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
+  if altname then pat = pat2 end
+
+  for p in gmatch(pat, ".") do
+    local x = nil
+    if p == "S" then
+      x = map_gpr[band(rshift(op, 21), 31)]
+    elseif p == "T" then
+      x = map_gpr[band(rshift(op, 16), 31)]
+    elseif p == "D" then
+      x = map_gpr[band(rshift(op, 11), 31)]
+    elseif p == "F" then
+      x = "f"..band(rshift(op, 6), 31)
+    elseif p == "G" then
+      x = "f"..band(rshift(op, 11), 31)
+    elseif p == "H" then
+      x = "f"..band(rshift(op, 16), 31)
+    elseif p == "R" then
+      x = "f"..band(rshift(op, 21), 31)
+    elseif p == "A" then
+      x = band(rshift(op, 6), 31)
+    elseif p == "M" then
+      x = band(rshift(op, 11), 31)
+    elseif p == "N" then
+      x = band(rshift(op, 16), 31)
+    elseif p == "C" then
+      x = band(rshift(op, 18), 7)
+      if x == 0 then x = nil end
+    elseif p == "K" then
+      x = band(rshift(op, 11), 31) + 1
+    elseif p == "L" then
+      x = band(rshift(op, 11), 31) - last + 1
+    elseif p == "I" then
+      x = arshift(lshift(op, 16), 16)
+    elseif p == "U" then
+      x = band(op, 0xffff)
+    elseif p == "O" then
+      local disp = arshift(lshift(op, 16), 16)
+      operands[#operands] = format("%d(%s)", disp, last)
+    elseif p == "X" then
+      local index = map_gpr[band(rshift(op, 16), 31)]
+      operands[#operands] = format("%s(%s)", index, last)
+    elseif p == "B" then
+      x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4
+      ctx.rel = x
+      x = "0x"..tohex(x)
+    elseif p == "J" then
+      x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4
+      ctx.rel = x
+      x = "0x"..tohex(x)
+    elseif p == "V" then
+      x = band(rshift(op, 8), 7)
+      if x == 0 then x = nil end
+    elseif p == "W" then
+      x = band(op, 7)
+      if x == 0 then x = nil end
+    elseif p == "Y" then
+      x = band(rshift(op, 6), 0x000fffff)
+      if x == 0 then x = nil end
+    elseif p == "Z" then
+      x = band(rshift(op, 6), 1023)
+      if x == 0 then x = nil end
+    elseif p == "0" then
+      if last == "r0" or last == 0 then
+	local n = #operands
+	operands[n] = nil
+	last = operands[n-1]
+	if altname then
+	  local a1, a2 = match(altname, "([^|]*)|(.*)")
+	  if a1 then name, altname = a1, a2
+	  else name = altname end
+	end
+      end
+    elseif p == "1" then
+      if last == "ra" then
+	operands[#operands] = nil
+      end
+    else
+      assert(false)
+    end
+    if x then operands[#operands+1] = x; last = x end
+  end
+
+  return putop(ctx, name, operands)
+end
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+  if not ofs then ofs = 0 end
+  local stop = len and ofs+len or #ctx.code
+  stop = stop - stop % 4
+  ctx.pos = ofs - ofs % 4
+  ctx.rel = nil
+  while ctx.pos < stop do disass_ins(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create_(code, addr, out)
+  local ctx = {}
+  ctx.code = code
+  ctx.addr = addr or 0
+  ctx.out = out or io.write
+  ctx.symtab = {}
+  ctx.disass = disass_block
+  ctx.hexdump = 8
+  ctx.get = get_be
+  return ctx
+end
+
+local function create_el_(code, addr, out)
+  local ctx = create_(code, addr, out)
+  ctx.get = get_le
+  return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass_(code, addr, out)
+  create_(code, addr, out):disass()
+end
+
+local function disass_el_(code, addr, out)
+  create_el_(code, addr, out):disass()
+end
+
+-- Return register name for RID.
+local function regname_(r)
+  if r < 32 then return map_gpr[r] end
+  return "f"..(r-32)
+end
+
+-- Public module functions.
+module(...)
+
+create = create_
+create_el = create_el_
+disass = disass_
+disass_el = disass_el_
+regname = regname_
+

+ 20 - 0
luajit.mod/luajit/src/jit/dis_mipsel.lua

@@ -0,0 +1,20 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPSEL disassembler wrapper module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the little-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local require = require
+
+module(...)
+
+local dis_mips = require(_PACKAGE.."dis_mips")
+
+create = dis_mips.create_el
+disass = dis_mips.disass_el
+regname = dis_mips.regname
+

+ 591 - 0
luajit.mod/luajit/src/jit/dis_ppc.lua

@@ -0,0 +1,591 @@
+----------------------------------------------------------------------------
+-- LuaJIT PPC disassembler module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT/X license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- It disassembles all common, non-privileged 32/64 bit PowerPC instructions
+-- plus the e500 SPE instructions and some Cell/Xenon extensions.
+--
+-- NYI: VMX, VMX128
+------------------------------------------------------------------------------
+
+local type = type
+local sub, byte, format = string.sub, string.byte, string.format
+local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local concat = table.concat
+local bit = require("bit")
+local band, bor, tohex = bit.band, bit.bor, bit.tohex
+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+
+------------------------------------------------------------------------------
+-- Primary and extended opcode maps
+------------------------------------------------------------------------------
+
+local map_crops = {
+  shift = 1, mask = 1023,
+  [0] = "mcrfXX",
+  [33] = "crnor|crnotCCC=", [129] = "crandcCCC",
+  [193] = "crxor|crclrCCC%", [225] = "crnandCCC",
+  [257] = "crandCCC", [289] = "creqv|crsetCCC%",
+  [417] = "crorcCCC", [449] = "cror|crmoveCCC=",
+  [16] = "b_lrKB", [528] = "b_ctrKB",
+  [150] = "isync",
+}
+
+local map_rlwinm = setmetatable({
+  shift = 0, mask = -1,
+},
+{ __index = function(t, x)
+    local rot = band(rshift(x, 11), 31)
+    local mb = band(rshift(x, 6), 31)
+    local me = band(rshift(x, 1), 31)
+    if mb == 0 and me == 31-rot then
+      return "slwiRR~A."
+    elseif me == 31 and mb == 32-rot then
+      return "srwiRR~-A."
+    else
+      return "rlwinmRR~AAA."
+    end
+  end
+})
+
+local map_rld = {
+  shift = 2, mask = 7,
+  [0] = "rldiclRR~HM.", "rldicrRR~HM.", "rldicRR~HM.", "rldimiRR~HM.",
+  {
+    shift = 1, mask = 1,
+    [0] = "rldclRR~RM.", "rldcrRR~RM.",
+  },
+}
+
+local map_ext = setmetatable({
+  shift = 1, mask = 1023,
+
+  [0] = "cmp_YLRR", [32] = "cmpl_YLRR",
+  [4] = "twARR", [68] = "tdARR",
+
+  [8] = "subfcRRR.", [40] = "subfRRR.",
+  [104] = "negRR.", [136] = "subfeRRR.",
+  [200] = "subfzeRR.", [232] = "subfmeRR.",
+  [520] = "subfcoRRR.", [552] = "subfoRRR.",
+  [616] = "negoRR.", [648] = "subfeoRRR.",
+  [712] = "subfzeoRR.", [744] = "subfmeoRR.",
+
+  [9] = "mulhduRRR.", [73] = "mulhdRRR.", [233] = "mulldRRR.",
+  [457] = "divduRRR.", [489] = "divdRRR.",
+  [745] = "mulldoRRR.",
+  [969] = "divduoRRR.", [1001] = "divdoRRR.",
+
+  [10] = "addcRRR.", [138] = "addeRRR.",
+  [202] = "addzeRR.", [234] = "addmeRR.", [266] = "addRRR.",
+  [522] = "addcoRRR.", [650] = "addeoRRR.",
+  [714] = "addzeoRR.", [746] = "addmeoRR.", [778] = "addoRRR.",
+
+  [11] = "mulhwuRRR.", [75] = "mulhwRRR.", [235] = "mullwRRR.",
+  [459] = "divwuRRR.", [491] = "divwRRR.",
+  [747] = "mullwoRRR.",
+  [971] = "divwouRRR.", [1003] = "divwoRRR.",
+
+  [15] = "iselltRRR", [47] = "iselgtRRR", [79] = "iseleqRRR",
+
+  [144] = { shift = 20, mask = 1, [0] = "mtcrfRZ~", "mtocrfRZ~", },
+  [19] = { shift = 20, mask = 1, [0] = "mfcrR", "mfocrfRZ", },
+  [371] = { shift = 11, mask = 1023, [392] = "mftbR", [424] = "mftbuR", },
+  [339] = {
+    shift = 11, mask = 1023,
+    [32] = "mferR", [256] = "mflrR", [288] = "mfctrR", [16] = "mfspefscrR",
+  },
+  [467] = {
+    shift = 11, mask = 1023,
+    [32] = "mtxerR", [256] = "mtlrR", [288] = "mtctrR", [16] = "mtspefscrR",
+  },
+
+  [20] = "lwarxRR0R", [84] = "ldarxRR0R",
+
+  [21] = "ldxRR0R", [53] = "lduxRRR",
+  [149] = "stdxRR0R", [181] = "stduxRRR",
+  [341] = "lwaxRR0R", [373] = "lwauxRRR",
+
+  [23] = "lwzxRR0R", [55] = "lwzuxRRR",
+  [87] = "lbzxRR0R", [119] = "lbzuxRRR",
+  [151] = "stwxRR0R", [183] = "stwuxRRR",
+  [215] = "stbxRR0R", [247] = "stbuxRRR",
+  [279] = "lhzxRR0R", [311] = "lhzuxRRR",
+  [343] = "lhaxRR0R", [375] = "lhauxRRR",
+  [407] = "sthxRR0R", [439] = "sthuxRRR",
+
+  [54] = "dcbst-R0R", [86] = "dcbf-R0R",
+  [150] = "stwcxRR0R.", [214] = "stdcxRR0R.",
+  [246] = "dcbtst-R0R", [278] = "dcbt-R0R",
+  [310] = "eciwxRR0R", [438] = "ecowxRR0R",
+  [470] = "dcbi-RR",
+
+  [598] = {
+    shift = 21, mask = 3,
+    [0] = "sync", "lwsync", "ptesync",
+  },
+  [758] = "dcba-RR",
+  [854] = "eieio", [982] = "icbi-R0R", [1014] = "dcbz-R0R",
+
+  [26] = "cntlzwRR~", [58] = "cntlzdRR~",
+  [122] = "popcntbRR~",
+  [154] = "prtywRR~", [186] = "prtydRR~",
+
+  [28] = "andRR~R.", [60] = "andcRR~R.", [124] = "nor|notRR~R=.",
+  [284] = "eqvRR~R.", [316] = "xorRR~R.",
+  [412] = "orcRR~R.", [444] = "or|mrRR~R=.", [476] = "nandRR~R.",
+  [508] = "cmpbRR~R",
+
+  [512] = "mcrxrX",
+
+  [532] = "ldbrxRR0R", [660] = "stdbrxRR0R",
+
+  [533] = "lswxRR0R", [597] = "lswiRR0A",
+  [661] = "stswxRR0R", [725] = "stswiRR0A",
+
+  [534] = "lwbrxRR0R", [662] = "stwbrxRR0R",
+  [790] = "lhbrxRR0R", [918] = "sthbrxRR0R",
+
+  [535] = "lfsxFR0R", [567] = "lfsuxFRR",
+  [599] = "lfdxFR0R", [631] = "lfduxFRR",
+  [663] = "stfsxFR0R", [695] = "stfsuxFRR",
+  [727] = "stfdxFR0R", [759] = "stfduxFR0R",
+  [855] = "lfiwaxFR0R",
+  [983] = "stfiwxFR0R",
+
+  [24] = "slwRR~R.",
+
+  [27] = "sldRR~R.", [536] = "srwRR~R.",
+  [792] = "srawRR~R.", [824] = "srawiRR~A.",
+
+  [794] = "sradRR~R.", [826] = "sradiRR~H.", [827] = "sradiRR~H.",
+  [922] = "extshRR~.", [954] = "extsbRR~.", [986] = "extswRR~.",
+
+  [539] = "srdRR~R.",
+},
+{ __index = function(t, x)
+    if band(x, 31) == 15 then return "iselRRRC" end
+  end
+})
+
+local map_ld = {
+  shift = 0, mask = 3,
+  [0] = "ldRRE", "lduRRE", "lwaRRE",
+}
+
+local map_std = {
+  shift = 0, mask = 3,
+  [0] = "stdRRE", "stduRRE",
+}
+
+local map_fps = {
+  shift = 5, mask = 1,
+  {
+    shift = 1, mask = 15,
+    [0] = false, false, "fdivsFFF.", false,
+    "fsubsFFF.", "faddsFFF.", "fsqrtsF-F.", false,
+    "fresF-F.", "fmulsFF-F.", "frsqrtesF-F.", false,
+    "fmsubsFFFF~.", "fmaddsFFFF~.", "fnmsubsFFFF~.", "fnmaddsFFFF~.",
+  }
+}
+
+local map_fpd = {
+  shift = 5, mask = 1,
+  [0] = {
+    shift = 1, mask = 1023,
+    [0] = "fcmpuXFF", [32] = "fcmpoXFF", [64] = "mcrfsXX",
+    [38] = "mtfsb1A.", [70] = "mtfsb0A.", [134] = "mtfsfiA>>-A>",
+    [8] = "fcpsgnFFF.", [40] = "fnegF-F.", [72] = "fmrF-F.",
+    [136] = "fnabsF-F.", [264] = "fabsF-F.",
+    [12] = "frspF-F.",
+    [14] = "fctiwF-F.", [15] = "fctiwzF-F.",
+    [583] = "mffsF.", [711] = "mtfsfZF.",
+    [392] = "frinF-F.", [424] = "frizF-F.",
+    [456] = "fripF-F.", [488] = "frimF-F.",
+    [814] = "fctidF-F.", [815] = "fctidzF-F.", [846] = "fcfidF-F.",
+  },
+  {
+    shift = 1, mask = 15,
+    [0] = false, false, "fdivFFF.", false,
+    "fsubFFF.", "faddFFF.", "fsqrtF-F.", "fselFFFF~.",
+    "freF-F.", "fmulFF-F.", "frsqrteF-F.", false,
+    "fmsubFFFF~.", "fmaddFFFF~.", "fnmsubFFFF~.", "fnmaddFFFF~.",
+  }
+}
+
+local map_spe = {
+  shift = 0, mask = 2047,
+
+  [512] = "evaddwRRR", [514] = "evaddiwRAR~",
+  [516] = "evsubwRRR~", [518] = "evsubiwRAR~",
+  [520] = "evabsRR", [521] = "evnegRR",
+  [522] = "evextsbRR", [523] = "evextshRR", [524] = "evrndwRR",
+  [525] = "evcntlzwRR", [526] = "evcntlswRR",
+
+  [527] = "brincRRR",
+
+  [529] = "evandRRR", [530] = "evandcRRR", [534] = "evxorRRR",
+  [535] = "evor|evmrRRR=", [536] = "evnor|evnotRRR=",
+  [537] = "eveqvRRR", [539] = "evorcRRR", [542] = "evnandRRR",
+
+  [544] = "evsrwuRRR", [545] = "evsrwsRRR",
+  [546] = "evsrwiuRRA", [547] = "evsrwisRRA",
+  [548] = "evslwRRR", [550] = "evslwiRRA",
+  [552] = "evrlwRRR", [553] = "evsplatiRS",
+  [554] = "evrlwiRRA", [555] = "evsplatfiRS",
+  [556] = "evmergehiRRR", [557] = "evmergeloRRR",
+  [558] = "evmergehiloRRR", [559] = "evmergelohiRRR",
+
+  [560] = "evcmpgtuYRR", [561] = "evcmpgtsYRR",
+  [562] = "evcmpltuYRR", [563] = "evcmpltsYRR",
+  [564] = "evcmpeqYRR",
+
+  [632] = "evselRRR", [633] = "evselRRRW",
+  [634] = "evselRRRW", [635] = "evselRRRW",
+  [636] = "evselRRRW", [637] = "evselRRRW",
+  [638] = "evselRRRW", [639] = "evselRRRW",
+
+  [640] = "evfsaddRRR", [641] = "evfssubRRR",
+  [644] = "evfsabsRR", [645] = "evfsnabsRR", [646] = "evfsnegRR",
+  [648] = "evfsmulRRR", [649] = "evfsdivRRR",
+  [652] = "evfscmpgtYRR", [653] = "evfscmpltYRR", [654] = "evfscmpeqYRR",
+  [656] = "evfscfuiR-R", [657] = "evfscfsiR-R",
+  [658] = "evfscfufR-R", [659] = "evfscfsfR-R",
+  [660] = "evfsctuiR-R", [661] = "evfsctsiR-R",
+  [662] = "evfsctufR-R", [663] = "evfsctsfR-R",
+  [664] = "evfsctuizR-R", [666] = "evfsctsizR-R",
+  [668] = "evfststgtYRR", [669] = "evfststltYRR", [670] = "evfststeqYRR",
+
+  [704] = "efsaddRRR", [705] = "efssubRRR",
+  [708] = "efsabsRR", [709] = "efsnabsRR", [710] = "efsnegRR",
+  [712] = "efsmulRRR", [713] = "efsdivRRR",
+  [716] = "efscmpgtYRR", [717] = "efscmpltYRR", [718] = "efscmpeqYRR",
+  [719] = "efscfdR-R",
+  [720] = "efscfuiR-R", [721] = "efscfsiR-R",
+  [722] = "efscfufR-R", [723] = "efscfsfR-R",
+  [724] = "efsctuiR-R", [725] = "efsctsiR-R",
+  [726] = "efsctufR-R", [727] = "efsctsfR-R",
+  [728] = "efsctuizR-R", [730] = "efsctsizR-R",
+  [732] = "efststgtYRR", [733] = "efststltYRR", [734] = "efststeqYRR",
+
+  [736] = "efdaddRRR", [737] = "efdsubRRR",
+  [738] = "efdcfuidR-R", [739] = "efdcfsidR-R",
+  [740] = "efdabsRR", [741] = "efdnabsRR", [742] = "efdnegRR",
+  [744] = "efdmulRRR", [745] = "efddivRRR",
+  [746] = "efdctuidzR-R", [747] = "efdctsidzR-R",
+  [748] = "efdcmpgtYRR", [749] = "efdcmpltYRR", [750] = "efdcmpeqYRR",
+  [751] = "efdcfsR-R",
+  [752] = "efdcfuiR-R", [753] = "efdcfsiR-R",
+  [754] = "efdcfufR-R", [755] = "efdcfsfR-R",
+  [756] = "efdctuiR-R", [757] = "efdctsiR-R",
+  [758] = "efdctufR-R", [759] = "efdctsfR-R",
+  [760] = "efdctuizR-R", [762] = "efdctsizR-R",
+  [764] = "efdtstgtYRR", [765] = "efdtstltYRR", [766] = "efdtsteqYRR",
+
+  [768] = "evlddxRR0R", [769] = "evlddRR8",
+  [770] = "evldwxRR0R", [771] = "evldwRR8",
+  [772] = "evldhxRR0R", [773] = "evldhRR8",
+  [776] = "evlhhesplatxRR0R", [777] = "evlhhesplatRR2",
+  [780] = "evlhhousplatxRR0R", [781] = "evlhhousplatRR2",
+  [782] = "evlhhossplatxRR0R", [783] = "evlhhossplatRR2",
+  [784] = "evlwhexRR0R", [785] = "evlwheRR4",
+  [788] = "evlwhouxRR0R", [789] = "evlwhouRR4",
+  [790] = "evlwhosxRR0R", [791] = "evlwhosRR4",
+  [792] = "evlwwsplatxRR0R", [793] = "evlwwsplatRR4",
+  [796] = "evlwhsplatxRR0R", [797] = "evlwhsplatRR4",
+
+  [800] = "evstddxRR0R", [801] = "evstddRR8",
+  [802] = "evstdwxRR0R", [803] = "evstdwRR8",
+  [804] = "evstdhxRR0R", [805] = "evstdhRR8",
+  [816] = "evstwhexRR0R", [817] = "evstwheRR4",
+  [820] = "evstwhoxRR0R", [821] = "evstwhoRR4",
+  [824] = "evstwwexRR0R", [825] = "evstwweRR4",
+  [828] = "evstwwoxRR0R", [829] = "evstwwoRR4",
+
+  [1027] = "evmhessfRRR", [1031] = "evmhossfRRR", [1032] = "evmheumiRRR",
+  [1033] = "evmhesmiRRR", [1035] = "evmhesmfRRR", [1036] = "evmhoumiRRR",
+  [1037] = "evmhosmiRRR", [1039] = "evmhosmfRRR", [1059] = "evmhessfaRRR",
+  [1063] = "evmhossfaRRR", [1064] = "evmheumiaRRR", [1065] = "evmhesmiaRRR",
+  [1067] = "evmhesmfaRRR", [1068] = "evmhoumiaRRR", [1069] = "evmhosmiaRRR",
+  [1071] = "evmhosmfaRRR", [1095] = "evmwhssfRRR", [1096] = "evmwlumiRRR",
+  [1100] = "evmwhumiRRR", [1101] = "evmwhsmiRRR", [1103] = "evmwhsmfRRR",
+  [1107] = "evmwssfRRR", [1112] = "evmwumiRRR", [1113] = "evmwsmiRRR",
+  [1115] = "evmwsmfRRR", [1127] = "evmwhssfaRRR", [1128] = "evmwlumiaRRR",
+  [1132] = "evmwhumiaRRR", [1133] = "evmwhsmiaRRR", [1135] = "evmwhsmfaRRR",
+  [1139] = "evmwssfaRRR", [1144] = "evmwumiaRRR", [1145] = "evmwsmiaRRR",
+  [1147] = "evmwsmfaRRR",
+
+  [1216] = "evaddusiaawRR", [1217] = "evaddssiaawRR",
+  [1218] = "evsubfusiaawRR", [1219] = "evsubfssiaawRR",
+  [1220] = "evmraRR",
+  [1222] = "evdivwsRRR", [1223] = "evdivwuRRR",
+  [1224] = "evaddumiaawRR", [1225] = "evaddsmiaawRR",
+  [1226] = "evsubfumiaawRR", [1227] = "evsubfsmiaawRR",
+
+  [1280] = "evmheusiaawRRR", [1281] = "evmhessiaawRRR",
+  [1283] = "evmhessfaawRRR", [1284] = "evmhousiaawRRR",
+  [1285] = "evmhossiaawRRR", [1287] = "evmhossfaawRRR",
+  [1288] = "evmheumiaawRRR", [1289] = "evmhesmiaawRRR",
+  [1291] = "evmhesmfaawRRR", [1292] = "evmhoumiaawRRR",
+  [1293] = "evmhosmiaawRRR", [1295] = "evmhosmfaawRRR",
+  [1320] = "evmhegumiaaRRR", [1321] = "evmhegsmiaaRRR",
+  [1323] = "evmhegsmfaaRRR", [1324] = "evmhogumiaaRRR",
+  [1325] = "evmhogsmiaaRRR", [1327] = "evmhogsmfaaRRR",
+  [1344] = "evmwlusiaawRRR", [1345] = "evmwlssiaawRRR",
+  [1352] = "evmwlumiaawRRR", [1353] = "evmwlsmiaawRRR",
+  [1363] = "evmwssfaaRRR", [1368] = "evmwumiaaRRR",
+  [1369] = "evmwsmiaaRRR", [1371] = "evmwsmfaaRRR",
+  [1408] = "evmheusianwRRR", [1409] = "evmhessianwRRR",
+  [1411] = "evmhessfanwRRR", [1412] = "evmhousianwRRR",
+  [1413] = "evmhossianwRRR", [1415] = "evmhossfanwRRR",
+  [1416] = "evmheumianwRRR", [1417] = "evmhesmianwRRR",
+  [1419] = "evmhesmfanwRRR", [1420] = "evmhoumianwRRR",
+  [1421] = "evmhosmianwRRR", [1423] = "evmhosmfanwRRR",
+  [1448] = "evmhegumianRRR", [1449] = "evmhegsmianRRR",
+  [1451] = "evmhegsmfanRRR", [1452] = "evmhogumianRRR",
+  [1453] = "evmhogsmianRRR", [1455] = "evmhogsmfanRRR",
+  [1472] = "evmwlusianwRRR", [1473] = "evmwlssianwRRR",
+  [1480] = "evmwlumianwRRR", [1481] = "evmwlsmianwRRR",
+  [1491] = "evmwssfanRRR", [1496] = "evmwumianRRR",
+  [1497] = "evmwsmianRRR", [1499] = "evmwsmfanRRR",
+}
+
+local map_pri = {
+  [0] = false,	false,		"tdiARI",	"twiARI",
+  map_spe,	false,		false,		"mulliRRI",
+  "subficRRI",	false,		"cmpl_iYLRU",	"cmp_iYLRI",
+  "addicRRI",	"addic.RRI",	"addi|liRR0I",	"addis|lisRR0I",
+  "b_KBJ",	"sc",		 "bKJ",		map_crops,
+  "rlwimiRR~AAA.", map_rlwinm,	false,		"rlwnmRR~RAA.",
+  "oriNRR~U",	"orisRR~U",	"xoriRR~U",	"xorisRR~U",
+  "andi.RR~U",	"andis.RR~U",	map_rld,	map_ext,
+  "lwzRRD",	"lwzuRRD",	"lbzRRD",	"lbzuRRD",
+  "stwRRD",	"stwuRRD",	"stbRRD",	"stbuRRD",
+  "lhzRRD",	"lhzuRRD",	"lhaRRD",	"lhauRRD",
+  "sthRRD",	"sthuRRD",	"lmwRRD",	"stmwRRD",
+  "lfsFRD",	"lfsuFRD",	"lfdFRD",	"lfduFRD",
+  "stfsFRD",	"stfsuFRD",	"stfdFRD",	"stfduFRD",
+  false,	false,		map_ld,		map_fps,
+  false,	false,		map_std,	map_fpd,
+}
+
+------------------------------------------------------------------------------
+
+local map_gpr = {
+  [0] = "r0", "sp", "r2", "r3", "r4", "r5", "r6", "r7",
+  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+  "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+}
+
+local map_cond = { [0] = "lt", "gt", "eq", "so", "ge", "le", "ne", "ns", }
+
+-- Format a condition bit.
+local function condfmt(cond)
+  if cond <= 3 then
+    return map_cond[band(cond, 3)]
+  else
+    return format("4*cr%d+%s", rshift(cond, 2), map_cond[band(cond, 3)])
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+  local pos = ctx.pos
+  local extra = ""
+  if ctx.rel then
+    local sym = ctx.symtab[ctx.rel]
+    if sym then extra = "\t->"..sym end
+  end
+  if ctx.hexdump > 0 then
+    ctx.out(format("%08x  %s  %-7s %s%s\n",
+	    ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
+  else
+    ctx.out(format("%08x  %-7s %s%s\n",
+	    ctx.addr+pos, text, concat(operands, ", "), extra))
+  end
+  ctx.pos = pos + 4
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+  return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
+end
+
+-- Disassemble a single instruction.
+local function disass_ins(ctx)
+  local pos = ctx.pos
+  local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+  local op = bor(lshift(b0, 24), lshift(b1, 16), lshift(b2, 8), b3)
+  local operands = {}
+  local last = nil
+  local rs = 21
+  ctx.op = op
+  ctx.rel = nil
+
+  local opat = map_pri[rshift(b0, 2)]
+  while type(opat) ~= "string" do
+    if not opat then return unknown(ctx) end
+    opat = opat[band(rshift(op, opat.shift), opat.mask)]
+  end
+  local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
+  local altname, pat2 = match(pat, "|([a-z0-9_.]*)(.*)")
+  if altname then pat = pat2 end
+
+  for p in gmatch(pat, ".") do
+    local x = nil
+    if p == "R" then
+      x = map_gpr[band(rshift(op, rs), 31)]
+      rs = rs - 5
+    elseif p == "F" then
+      x = "f"..band(rshift(op, rs), 31)
+      rs = rs - 5
+    elseif p == "A" then
+      x = band(rshift(op, rs), 31)
+      rs = rs - 5
+    elseif p == "S" then
+      x = arshift(lshift(op, 27-rs), 27)
+      rs = rs - 5
+    elseif p == "I" then
+      x = arshift(lshift(op, 16), 16)
+    elseif p == "U" then
+      x = band(op, 0xffff)
+    elseif p == "D" or p == "E" then
+      local disp = arshift(lshift(op, 16), 16)
+      if p == "E" then disp = band(disp, -4) end
+      if last == "r0" then last = "0" end
+      operands[#operands] = format("%d(%s)", disp, last)
+    elseif p >= "2" and p <= "8" then
+      local disp = band(rshift(op, rs), 31) * p
+      if last == "r0" then last = "0" end
+      operands[#operands] = format("%d(%s)", disp, last)
+    elseif p == "H" then
+      x = band(rshift(op, rs), 31) + lshift(band(op, 2), 4)
+      rs = rs - 5
+    elseif p == "M" then
+      x = band(rshift(op, rs), 31) + band(op, 0x20)
+    elseif p == "C" then
+      x = condfmt(band(rshift(op, rs), 31))
+      rs = rs - 5
+    elseif p == "B" then
+      local bo = rshift(op, 21)
+      local cond = band(rshift(op, 16), 31)
+      local cn = ""
+      rs = rs - 10
+      if band(bo, 4) == 0 then
+	cn = band(bo, 2) == 0 and "dnz" or "dz"
+	if band(bo, 0x10) == 0 then
+	  cn = cn..(band(bo, 8) == 0 and "f" or "t")
+	end
+	if band(bo, 0x10) == 0 then x = condfmt(cond) end
+	name = name..(band(bo, 1) == band(rshift(op, 15), 1) and "-" or "+")
+      elseif band(bo, 0x10) == 0 then
+	cn = map_cond[band(cond, 3) + (band(bo, 8) == 0 and 4 or 0)]
+	if cond > 3 then x = "cr"..rshift(cond, 2) end
+	name = name..(band(bo, 1) == band(rshift(op, 15), 1) and "-" or "+")
+      end
+      name = gsub(name, "_", cn)
+    elseif p == "J" then
+      x = arshift(lshift(op, 27-rs), 29-rs)*4
+      if band(op, 2) == 0 then x = ctx.addr + pos + x end
+      ctx.rel = x
+      x = "0x"..tohex(x)
+    elseif p == "K" then
+      if band(op, 1) ~= 0 then name = name.."l" end
+      if band(op, 2) ~= 0 then name = name.."a" end
+    elseif p == "X" or p == "Y" then
+      x = band(rshift(op, rs+2), 7)
+      if x == 0 and p == "Y" then x = nil else x = "cr"..x end
+      rs = rs - 5
+    elseif p == "W" then
+      x = "cr"..band(op, 7)
+    elseif p == "Z" then
+      x = band(rshift(op, rs-4), 255)
+      rs = rs - 10
+    elseif p == ">" then
+      operands[#operands] = rshift(operands[#operands], 1)
+    elseif p == "0" then
+      if last == "r0" then
+	operands[#operands] = nil
+	if altname then name = altname end
+      end
+    elseif p == "L" then
+      name = gsub(name, "_", band(op, 0x00200000) ~= 0 and "d" or "w")
+    elseif p == "." then
+      if band(op, 1) == 1 then name = name.."." end
+    elseif p == "N" then
+      if op == 0x60000000 then name = "nop"; break end
+    elseif p == "~" then
+      local n = #operands
+      operands[n-1],  operands[n] = operands[n], operands[n-1]
+    elseif p == "=" then
+      local n = #operands
+      if last == operands[n-1] then
+	operands[n] = nil
+	name = altname
+      end
+    elseif p == "%" then
+      local n = #operands
+      if last == operands[n-1] and last == operands[n-2] then
+	operands[n] = nil
+	operands[n-1] = nil
+	name = altname
+      end
+    elseif p == "-" then
+      rs = rs - 5
+    else
+      assert(false)
+    end
+    if x then operands[#operands+1] = x; last = x end
+  end
+
+  return putop(ctx, name, operands)
+end
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+  if not ofs then ofs = 0 end
+  local stop = len and ofs+len or #ctx.code
+  stop = stop - stop % 4
+  ctx.pos = ofs - ofs % 4
+  ctx.rel = nil
+  while ctx.pos < stop do disass_ins(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create_(code, addr, out)
+  local ctx = {}
+  ctx.code = code
+  ctx.addr = addr or 0
+  ctx.out = out or io.write
+  ctx.symtab = {}
+  ctx.disass = disass_block
+  ctx.hexdump = 8
+  return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass_(code, addr, out)
+  create_(code, addr, out):disass()
+end
+
+-- Return register name for RID.
+local function regname_(r)
+  if r < 32 then return map_gpr[r] end
+  return "f"..(r-32)
+end
+
+-- Public module functions.
+module(...)
+
+create = create_
+disass = disass_
+regname = regname_
+

+ 20 - 0
luajit.mod/luajit/src/jit/dis_x64.lua

@@ -0,0 +1,20 @@
+----------------------------------------------------------------------------
+-- LuaJIT x64 disassembler wrapper module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the 64 bit functions from the combined
+-- x86/x64 disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local require = require
+
+module(...)
+
+local dis_x86 = require(_PACKAGE.."dis_x86")
+
+create = dis_x86.create64
+disass = dis_x86.disass64
+regname = dis_x86.regname64
+

+ 836 - 0
luajit.mod/luajit/src/jit/dis_x86.lua

@@ -0,0 +1,836 @@
+----------------------------------------------------------------------------
+-- LuaJIT x86/x64 disassembler module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- Sending small code snippets to an external disassembler and mixing the
+-- output with our own stuff was too fragile. So I had to bite the bullet
+-- and write yet another x86 disassembler. Oh well ...
+--
+-- The output format is very similar to what ndisasm generates. But it has
+-- been developed independently by looking at the opcode tables from the
+-- Intel and AMD manuals. The supported instruction set is quite extensive
+-- and reflects what a current generation Intel or AMD CPU implements in
+-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
+-- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
+-- instructions.
+--
+-- Notes:
+-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
+-- * No attempt at optimization has been made -- it's fast enough for my needs.
+-- * The public API may change when more architectures are added.
+------------------------------------------------------------------------------
+
+local type = type
+local sub, byte, format = string.sub, string.byte, string.format
+local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local lower, rep = string.lower, string.rep
+
+-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
+local map_opc1_32 = {
+--0x
+[0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es",
+"orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*",
+--1x
+"adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss",
+"sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds",
+--2x
+"andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa",
+"subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das",
+--3x
+"xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa",
+"cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas",
+--4x
+"incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR",
+"decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR",
+--5x
+"pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR",
+"popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR",
+--6x
+"sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr",
+"fs:seg","gs:seg","o16:","a16",
+"pushUi","imulVrmi","pushBs","imulVrms",
+"insb","insVS","outsb","outsVS",
+--7x
+"joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj",
+"jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj",
+--8x
+"arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms",
+"testBmr","testVmr","xchgBrm","xchgVrm",
+"movBmr","movVmr","movBrm","movVrm",
+"movVmg","leaVrm","movWgm","popUm",
+--9x
+"nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR",
+"xchgVaR","xchgVaR","xchgVaR","xchgVaR",
+"sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait",
+"sz*pushfw,pushf","sz*popfw,popf","sahf","lahf",
+--Ax
+"movBao","movVao","movBoa","movVoa",
+"movsb","movsVS","cmpsb","cmpsVS",
+"testBai","testVai","stosb","stosVS",
+"lodsb","lodsVS","scasb","scasVS",
+--Bx
+"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
+"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
+--Cx
+"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
+"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
+--Dx
+"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
+"fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7",
+--Ex
+"loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj",
+"inBau","inVau","outBua","outVua",
+"callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda",
+--Fx
+"lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm",
+"clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm",
+}
+assert(#map_opc1_32 == 255)
+
+-- Map for 1st opcode byte in 64 bit mode (overrides only).
+local map_opc1_64 = setmetatable({
+  [0x06]=false, [0x07]=false, [0x0e]=false,
+  [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false,
+  [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false,
+  [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:",
+  [0x40]="rex*",   [0x41]="rex*b",   [0x42]="rex*x",   [0x43]="rex*xb",
+  [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
+  [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
+  [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
+  [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
+  [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
+}, { __index = map_opc1_32 })
+
+-- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you.
+-- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2
+local map_opc2 = {
+--0x
+[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
+"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
+--1x
+"movupsXrm|movssXrm|movupdXrm|movsdXrm",
+"movupsXmr|movssXmr|movupdXmr|movsdXmr",
+"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
+"movlpsXmr||movlpdXmr",
+"unpcklpsXrm||unpcklpdXrm",
+"unpckhpsXrm||unpckhpdXrm",
+"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
+"movhpsXmr||movhpdXmr",
+"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
+"hintnopVm","hintnopVm","hintnopVm","hintnopVm",
+--2x
+"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
+"movapsXrm||movapdXrm",
+"movapsXmr||movapdXmr",
+"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
+"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
+"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
+"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
+"ucomissXrm||ucomisdXrm",
+"comissXrm||comisdXrm",
+--3x
+"wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec",
+"opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil,
+--4x
+"cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm",
+"cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm",
+"cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm",
+"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
+--5x
+"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
+"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
+"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
+"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
+"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
+"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
+"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
+"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
+"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
+--6x
+"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
+"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
+"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
+"||punpcklqdqXrm","||punpckhqdqXrm",
+"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
+--7x
+"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
+"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
+"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
+"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
+nil,nil,
+"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
+"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
+--8x
+"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
+"jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj",
+--9x
+"setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm",
+"setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm",
+--Ax
+"push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil,
+"push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm",
+--Bx
+"cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr",
+"$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt",
+"|popcntVrm","ud2Dp","bt!Vmu","btcVmr",
+"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
+--Cx
+"xaddBmr","xaddVmr",
+"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
+"pinsrwPrWmu","pextrwDrPmu",
+"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
+"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
+--Dx
+"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
+"paddqPrm","pmullwPrm",
+"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
+"psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
+"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
+--Ex
+"pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
+"pmulhuwPrm","pmulhwPrm",
+"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
+"psubsbPrm","psubswPrm","pminswPrm","porPrm",
+"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
+--Fx
+"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
+"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
+"psubbPrm","psubwPrm","psubdPrm","psubqPrm",
+"paddbPrm","paddwPrm","padddPrm","ud",
+}
+assert(map_opc2[255] == "ud")
+
+-- Map for three-byte opcodes. Can't wait for their next invention.
+local map_opc3 = {
+["38"] = { -- [66] 0f 38 xx
+--0x
+[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
+"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
+"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
+nil,nil,nil,nil,
+--1x
+"||pblendvbXrma",nil,nil,nil,
+"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
+nil,nil,nil,nil,
+"pabsbPrm","pabswPrm","pabsdPrm",nil,
+--2x
+"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
+"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
+"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
+nil,nil,nil,nil,
+--3x
+"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
+"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
+"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
+"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
+--4x
+"||pmulddXrm","||phminposuwXrm",
+--Fx
+[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
+},
+
+["3a"] = { -- [66] 0f 3a xx
+--0x
+[0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
+"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
+"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
+--1x
+nil,nil,nil,nil,
+"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
+nil,nil,nil,nil,nil,nil,nil,nil,
+--2x
+"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
+--4x
+[0x40] = "||dppsXrmu",
+[0x41] = "||dppdXrmu",
+[0x42] = "||mpsadbwXrmu",
+--6x
+[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
+[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
+},
+}
+
+-- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands).
+local map_opcvm = {
+[0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff",
+[0xc8]="monitor",[0xc9]="mwait",
+[0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave",
+[0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga",
+[0xf8]="swapgs",[0xf9]="rdtscp",
+}
+
+-- Map for FP opcodes. And you thought stack machines are simple?
+local map_opcfp = {
+-- D8-DF 00-BF: opcodes with a memory operand.
+-- D8
+[0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm",
+"fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm",
+-- DA
+"fiaddDm","fimulDm","ficomDm","ficompDm",
+"fisubDm","fisubrDm","fidivDm","fidivrDm",
+-- DB
+"fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp",
+-- DC
+"faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm",
+-- DD
+"fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm",
+-- DE
+"fiaddWm","fimulWm","ficomWm","ficompWm",
+"fisubWm","fisubrWm","fidivWm","fidivrWm",
+-- DF
+"fildWm","fisttpWm","fistWm","fistpWm",
+"fbld twordFmp","fildQm","fbstp twordFmp","fistpQm",
+-- xx C0-FF: opcodes with a pseudo-register operand.
+-- D8
+"faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf",
+-- D9
+"fldFf","fxchFf",{"fnop"},nil,
+{"fchs","fabs",nil,nil,"ftst","fxam"},
+{"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"},
+{"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"},
+{"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"},
+-- DA
+"fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil,
+-- DB
+"fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf",
+{nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil,
+-- DC
+"fadd toFf","fmul toFf",nil,nil,
+"fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf",
+-- DD
+"ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil,
+-- DE
+"faddpFf","fmulpFf",nil,{nil,"fcompp"},
+"fsubrpFf","fsubpFf","fdivrpFf","fdivpFf",
+-- DF
+nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil,
+}
+assert(map_opcfp[126] == "fcomipFf")
+
+-- Map for opcode groups. The subkey is sp from the ModRM byte.
+local map_opcgroup = {
+  arith = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" },
+  shift = { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" },
+  testb = { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" },
+  testv = { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" },
+  incb = { "inc", "dec" },
+  incd = { "inc", "dec", "callUmp", "$call farDmp",
+	   "jmpUmp", "$jmp farDmp", "pushUm" },
+  sldt = { "sldt", "str", "lldt", "ltr", "verr", "verw" },
+  sgdt = { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt",
+	   "smsw", nil, "lmsw", "vm*$invlpg" },
+  bt = { nil, nil, nil, nil, "bt", "bts", "btr", "btc" },
+  cmpxchg = { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil,
+	      nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" },
+  pshiftw = { nil, nil, "psrlw", nil, "psraw", nil, "psllw" },
+  pshiftd = { nil, nil, "psrld", nil, "psrad", nil, "pslld" },
+  pshiftq = { nil, nil, "psrlq", nil, nil, nil, "psllq" },
+  pshiftdq = { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" },
+  fxsave = { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr",
+	     nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" },
+  prefetch = { "prefetch", "prefetchw" },
+  prefetcht = { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" },
+}
+
+------------------------------------------------------------------------------
+
+-- Maps for register names.
+local map_regs = {
+  B = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
+	"r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
+  B64 = { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil",
+	  "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
+  W = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
+	"r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
+  D = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
+	"r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
+  Q = { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
+	"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
+  M = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
+	"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
+  X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+	"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
+}
+local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
+
+-- Maps for size names.
+local map_sz2n = {
+  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
+}
+local map_sz2prefix = {
+  B = "byte", W = "word", D = "dword",
+  Q = "qword",
+  M = "qword", X = "xword",
+  F = "dword", G = "qword", -- No need for sizes/register names for these two.
+}
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+  local code, pos, hex = ctx.code, ctx.pos, ""
+  local hmax = ctx.hexdump
+  if hmax > 0 then
+    for i=ctx.start,pos-1 do
+      hex = hex..format("%02X", byte(code, i, i))
+    end
+    if #hex > hmax then hex = sub(hex, 1, hmax)..". "
+    else hex = hex..rep(" ", hmax-#hex+2) end
+  end
+  if operands then text = text.." "..operands end
+  if ctx.o16 then text = "o16 "..text; ctx.o16 = false end
+  if ctx.a32 then text = "a32 "..text; ctx.a32 = false end
+  if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
+  if ctx.rex then
+    local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
+	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
+    if t ~= "" then text = "rex."..t.." "..text end
+    ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
+    ctx.rex = false
+  end
+  if ctx.seg then
+    local text2, n = gsub(text, "%[", "["..ctx.seg..":")
+    if n == 0 then text = ctx.seg.." "..text else text = text2 end
+    ctx.seg = false
+  end
+  if ctx.lock then text = "lock "..text; ctx.lock = false end
+  local imm = ctx.imm
+  if imm then
+    local sym = ctx.symtab[imm]
+    if sym then text = text.."\t->"..sym end
+  end
+  ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
+  ctx.mrm = false
+  ctx.start = pos
+  ctx.imm = nil
+end
+
+-- Clear all prefix flags.
+local function clearprefixes(ctx)
+  ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
+  ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
+  ctx.rex = false; ctx.a32 = false
+end
+
+-- Fallback for incomplete opcodes at the end.
+local function incomplete(ctx)
+  ctx.pos = ctx.stop+1
+  clearprefixes(ctx)
+  return putop(ctx, "(incomplete)")
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+  clearprefixes(ctx)
+  return putop(ctx, "(unknown)")
+end
+
+-- Return an immediate of the specified size.
+local function getimm(ctx, pos, n)
+  if pos+n-1 > ctx.stop then return incomplete(ctx) end
+  local code = ctx.code
+  if n == 1 then
+    local b1 = byte(code, pos, pos)
+    return b1
+  elseif n == 2 then
+    local b1, b2 = byte(code, pos, pos+1)
+    return b1+b2*256
+  else
+    local b1, b2, b3, b4 = byte(code, pos, pos+3)
+    local imm = b1+b2*256+b3*65536+b4*16777216
+    ctx.imm = imm
+    return imm
+  end
+end
+
+-- Process pattern string and generate the operands.
+local function putpat(ctx, name, pat)
+  local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
+  local code, pos, stop = ctx.code, ctx.pos, ctx.stop
+
+  -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
+  for p in gmatch(pat, ".") do
+    local x = nil
+    if p == "V" or p == "U" then
+      if ctx.rexw then sz = "Q"; ctx.rexw = false
+      elseif ctx.o16 then sz = "W"; ctx.o16 = false
+      elseif p == "U" and ctx.x64 then sz = "Q"
+      else sz = "D" end
+      regs = map_regs[sz]
+    elseif p == "T" then
+      if ctx.rexw then sz = "Q"; ctx.rexw = false else sz = "D" end
+      regs = map_regs[sz]
+    elseif p == "B" then
+      sz = "B"
+      regs = ctx.rex and map_regs.B64 or map_regs.B
+    elseif match(p, "[WDQMXFG]") then
+      sz = p
+      regs = map_regs[sz]
+    elseif p == "P" then
+      sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+      regs = map_regs[sz]
+    elseif p == "S" then
+      name = name..lower(sz)
+    elseif p == "s" then
+      local imm = getimm(ctx, pos, 1); if not imm then return end
+      x = imm <= 127 and format("+0x%02x", imm)
+		     or format("-0x%02x", 256-imm)
+      pos = pos+1
+    elseif p == "u" then
+      local imm = getimm(ctx, pos, 1); if not imm then return end
+      x = format("0x%02x", imm)
+      pos = pos+1
+    elseif p == "w" then
+      local imm = getimm(ctx, pos, 2); if not imm then return end
+      x = format("0x%x", imm)
+      pos = pos+2
+    elseif p == "o" then -- [offset]
+      if ctx.x64 then
+	local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
+	local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
+	x = format("[0x%08x%08x]", imm2, imm1)
+	pos = pos+8
+      else
+	local imm = getimm(ctx, pos, 4); if not imm then return end
+	x = format("[0x%08x]", imm)
+	pos = pos+4
+      end
+    elseif p == "i" or p == "I" then
+      local n = map_sz2n[sz]
+      if n == 8 and ctx.x64 and p == "I" then
+	local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
+	local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
+	x = format("0x%08x%08x", imm2, imm1)
+      else
+	if n == 8 then n = 4 end
+	local imm = getimm(ctx, pos, n); if not imm then return end
+	if sz == "Q" and (imm < 0 or imm > 0x7fffffff) then
+	  imm = (0xffffffff+1)-imm
+	  x = format(imm > 65535 and "-0x%08x" or "-0x%x", imm)
+	else
+	  x = format(imm > 65535 and "0x%08x" or "0x%x", imm)
+	end
+      end
+      pos = pos+n
+    elseif p == "j" then
+      local n = map_sz2n[sz]
+      if n == 8 then n = 4 end
+      local imm = getimm(ctx, pos, n); if not imm then return end
+      if sz == "B" and imm > 127 then imm = imm-256
+      elseif imm > 2147483647 then imm = imm-4294967296 end
+      pos = pos+n
+      imm = imm + pos + ctx.addr
+      if imm > 4294967295 and not ctx.x64 then imm = imm-4294967296 end
+      ctx.imm = imm
+      if sz == "W" then
+	x = format("word 0x%04x", imm%65536)
+      elseif ctx.x64 then
+	local lo = imm % 0x1000000
+	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
+      else
+	x = format("0x%08x", imm)
+      end
+    elseif p == "R" then
+      local r = byte(code, pos-1, pos-1)%8
+      if ctx.rexb then r = r + 8; ctx.rexb = false end
+      x = regs[r+1]
+    elseif p == "a" then x = regs[1]
+    elseif p == "c" then x = "cl"
+    elseif p == "d" then x = "dx"
+    elseif p == "1" then x = "1"
+    else
+      if not mode then
+	mode = ctx.mrm
+	if not mode then
+	  if pos > stop then return incomplete(ctx) end
+	  mode = byte(code, pos, pos)
+	  pos = pos+1
+	end
+	rm = mode%8; mode = (mode-rm)/8
+	sp = mode%8; mode = (mode-sp)/8
+	sdisp = ""
+	if mode < 3 then
+	  if rm == 4 then
+	    if pos > stop then return incomplete(ctx) end
+	    sc = byte(code, pos, pos)
+	    pos = pos+1
+	    rm = sc%8; sc = (sc-rm)/8
+	    rx = sc%8; sc = (sc-rx)/8
+	    if ctx.rexx then rx = rx + 8; ctx.rexx = false end
+	    if rx == 4 then rx = nil end
+	  end
+	  if mode > 0 or rm == 5 then
+	    local dsz = mode
+	    if dsz ~= 1 then dsz = 4 end
+	    local disp = getimm(ctx, pos, dsz); if not disp then return end
+	    if mode == 0 then rm = nil end
+	    if rm or rx or (not sc and ctx.x64 and not ctx.a32) then
+	      if dsz == 1 and disp > 127 then
+		sdisp = format("-0x%x", 256-disp)
+	      elseif disp >= 0 and disp <= 0x7fffffff then
+		sdisp = format("+0x%x", disp)
+	      else
+		sdisp = format("-0x%x", (0xffffffff+1)-disp)
+	      end
+	    else
+	      sdisp = format(ctx.x64 and not ctx.a32 and
+		not (disp >= 0 and disp <= 0x7fffffff)
+		and "0xffffffff%08x" or "0x%08x", disp)
+	    end
+	    pos = pos+dsz
+	  end
+	end
+	if rm and ctx.rexb then rm = rm + 8; ctx.rexb = false end
+	if ctx.rexr then sp = sp + 8; ctx.rexr = false end
+      end
+      if p == "m" then
+	if mode == 3 then x = regs[rm+1]
+	else
+	  local aregs = ctx.a32 and map_regs.D or ctx.aregs
+	  local srm, srx = "", ""
+	  if rm then srm = aregs[rm+1]
+	  elseif not sc and ctx.x64 and not ctx.a32 then srm = "rip" end
+	  ctx.a32 = false
+	  if rx then
+	    if rm then srm = srm.."+" end
+	    srx = aregs[rx+1]
+	    if sc > 0 then srx = srx.."*"..(2^sc) end
+	  end
+	  x = format("[%s%s%s]", srm, srx, sdisp)
+	end
+	if mode < 3 and
+	   (not match(pat, "[aRrgp]") or match(pat, "t")) then -- Yuck.
+	  x = map_sz2prefix[sz].." "..x
+	end
+      elseif p == "r" then x = regs[sp+1]
+      elseif p == "g" then x = map_segregs[sp+1]
+      elseif p == "p" then -- Suppress prefix.
+      elseif p == "f" then x = "st"..rm
+      elseif p == "x" then
+	if sp == 0 and ctx.lock and not ctx.x64 then
+	  x = "CR8"; ctx.lock = false
+	else
+	  x = "CR"..sp
+	end
+      elseif p == "y" then x = "DR"..sp
+      elseif p == "z" then x = "TR"..sp
+      elseif p == "t" then
+      else
+	error("bad pattern `"..pat.."'")
+      end
+    end
+    if x then operands = operands and operands..", "..x or x end
+  end
+  ctx.pos = pos
+  return putop(ctx, name, operands)
+end
+
+-- Forward declaration.
+local map_act
+
+-- Fetch and cache MRM byte.
+local function getmrm(ctx)
+  local mrm = ctx.mrm
+  if not mrm then
+    local pos = ctx.pos
+    if pos > ctx.stop then return nil end
+    mrm = byte(ctx.code, pos, pos)
+    ctx.pos = pos+1
+    ctx.mrm = mrm
+  end
+  return mrm
+end
+
+-- Dispatch to handler depending on pattern.
+local function dispatch(ctx, opat, patgrp)
+  if not opat then return unknown(ctx) end
+  if match(opat, "%|") then -- MMX/SSE variants depending on prefix.
+    local p
+    if ctx.rep then
+      p = ctx.rep=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)"
+      ctx.rep = false
+    elseif ctx.o16 then p = "%|[^%|]*%|([^%|]*)"; ctx.o16 = false
+    else p = "^[^%|]*" end
+    opat = match(opat, p)
+    if not opat then return unknown(ctx) end
+--    ctx.rep = false; ctx.o16 = false
+    --XXX fails for 66 f2 0f 38 f1 06  crc32 eax,WORD PTR [esi]
+    --XXX remove in branches?
+  end
+  if match(opat, "%$") then -- reg$mem variants.
+    local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
+    opat = match(opat, mrm >= 192 and "^[^%$]*" or "%$(.*)")
+    if opat == "" then return unknown(ctx) end
+  end
+  if opat == "" then return unknown(ctx) end
+  local name, pat = match(opat, "^([a-z0-9 ]*)(.*)")
+  if pat == "" and patgrp then pat = patgrp end
+  return map_act[sub(pat, 1, 1)](ctx, name, pat)
+end
+
+-- Get a pattern from an opcode map and dispatch to handler.
+local function dispatchmap(ctx, opcmap)
+  local pos = ctx.pos
+  local opat = opcmap[byte(ctx.code, pos, pos)]
+  pos = pos + 1
+  ctx.pos = pos
+  return dispatch(ctx, opat)
+end
+
+-- Map for action codes. The key is the first char after the name.
+map_act = {
+  -- Simple opcodes without operands.
+  [""] = function(ctx, name, pat)
+    return putop(ctx, name)
+  end,
+
+  -- Operand size chars fall right through.
+  B = putpat, W = putpat, D = putpat, Q = putpat,
+  V = putpat, U = putpat, T = putpat,
+  M = putpat, X = putpat, P = putpat,
+  F = putpat, G = putpat,
+
+  -- Collect prefixes.
+  [":"] = function(ctx, name, pat)
+    ctx[pat == ":" and name or sub(pat, 2)] = name
+    if ctx.pos - ctx.start > 5 then return unknown(ctx) end -- Limit #prefixes.
+  end,
+
+  -- Chain to special handler specified by name.
+  ["*"] = function(ctx, name, pat)
+    return map_act[name](ctx, name, sub(pat, 2))
+  end,
+
+  -- Use named subtable for opcode group.
+  ["!"] = function(ctx, name, pat)
+    local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
+    return dispatch(ctx, map_opcgroup[name][((mrm-(mrm%8))/8)%8+1], sub(pat, 2))
+  end,
+
+  -- o16,o32[,o64] variants.
+  sz = function(ctx, name, pat)
+    if ctx.o16 then ctx.o16 = false
+    else
+      pat = match(pat, ",(.*)")
+      if ctx.rexw then
+	local p = match(pat, ",(.*)")
+	if p then pat = p; ctx.rexw = false end
+      end
+    end
+    pat = match(pat, "^[^,]*")
+    return dispatch(ctx, pat)
+  end,
+
+  -- Two-byte opcode dispatch.
+  opc2 = function(ctx, name, pat)
+    return dispatchmap(ctx, map_opc2)
+  end,
+
+  -- Three-byte opcode dispatch.
+  opc3 = function(ctx, name, pat)
+    return dispatchmap(ctx, map_opc3[pat])
+  end,
+
+  -- VMX/SVM dispatch.
+  vm = function(ctx, name, pat)
+    return dispatch(ctx, map_opcvm[ctx.mrm])
+  end,
+
+  -- Floating point opcode dispatch.
+  fp = function(ctx, name, pat)
+    local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
+    local rm = mrm%8
+    local idx = pat*8 + ((mrm-rm)/8)%8
+    if mrm >= 192 then idx = idx + 64 end
+    local opat = map_opcfp[idx]
+    if type(opat) == "table" then opat = opat[rm+1] end
+    return dispatch(ctx, opat)
+  end,
+
+  -- REX prefix.
+  rex = function(ctx, name, pat)
+    if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
+    for p in gmatch(pat, ".") do ctx["rex"..p] = true end
+    ctx.rex = true
+  end,
+
+  -- Special case for nop with REX prefix.
+  nop = function(ctx, name, pat)
+    return dispatch(ctx, ctx.rex and pat or "nop")
+  end,
+}
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+  if not ofs then ofs = 0 end
+  local stop = len and ofs+len or #ctx.code
+  ofs = ofs + 1
+  ctx.start = ofs
+  ctx.pos = ofs
+  ctx.stop = stop
+  ctx.imm = nil
+  ctx.mrm = false
+  clearprefixes(ctx)
+  while ctx.pos <= stop do dispatchmap(ctx, ctx.map1) end
+  if ctx.pos ~= ctx.start then incomplete(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create_(code, addr, out)
+  local ctx = {}
+  ctx.code = code
+  ctx.addr = (addr or 0) - 1
+  ctx.out = out or io.write
+  ctx.symtab = {}
+  ctx.disass = disass_block
+  ctx.hexdump = 16
+  ctx.x64 = false
+  ctx.map1 = map_opc1_32
+  ctx.aregs = map_regs.D
+  return ctx
+end
+
+local function create64_(code, addr, out)
+  local ctx = create_(code, addr, out)
+  ctx.x64 = true
+  ctx.map1 = map_opc1_64
+  ctx.aregs = map_regs.Q
+  return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass_(code, addr, out)
+  create_(code, addr, out):disass()
+end
+
+local function disass64_(code, addr, out)
+  create64_(code, addr, out):disass()
+end
+
+-- Return register name for RID.
+local function regname_(r)
+  if r < 8 then return map_regs.D[r+1] end
+  return map_regs.X[r-7]
+end
+
+local function regname64_(r)
+  if r < 16 then return map_regs.Q[r+1] end
+  return map_regs.X[r-15]
+end
+
+-- Public module functions.
+module(...)
+
+create = create_
+create64 = create64_
+disass = disass_
+disass64 = disass64_
+regname = regname_
+regname64 = regname64_
+

+ 699 - 0
luajit.mod/luajit/src/jit/dump.lua

@@ -0,0 +1,699 @@
+----------------------------------------------------------------------------
+-- LuaJIT compiler dump module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module can be used to debug the JIT compiler itself. It dumps the
+-- code representations and structures used in various compiler stages.
+--
+-- Example usage:
+--
+--   luajit -jdump -e "local x=0; for i=1,1e6 do x=x+i end; print(x)"
+--   luajit -jdump=im -e "for i=1,1000 do for j=1,1000 do end end" | less -R
+--   luajit -jdump=is myapp.lua | less -R
+--   luajit -jdump=-b myapp.lua
+--   luajit -jdump=+aH,myapp.html myapp.lua
+--   luajit -jdump=ixT,myapp.dump myapp.lua
+--
+-- The first argument specifies the dump mode. The second argument gives
+-- the output file name. Default output is to stdout, unless the environment
+-- variable LUAJIT_DUMPFILE is set. The file is overwritten every time the
+-- module is started.
+--
+-- Different features can be turned on or off with the dump mode. If the
+-- mode starts with a '+', the following features are added to the default
+-- set of features; a '-' removes them. Otherwise the features are replaced.
+--
+-- The following dump features are available (* marks the default):
+--
+--  * t  Print a line for each started, ended or aborted trace (see also -jv).
+--  * b  Dump the traced bytecode.
+--  * i  Dump the IR (intermediate representation).
+--    r  Augment the IR with register/stack slots.
+--    s  Dump the snapshot map.
+--  * m  Dump the generated machine code.
+--    x  Print each taken trace exit.
+--    X  Print each taken trace exit and the contents of all registers.
+--    a  Print the IR of aborted traces, too.
+--
+-- The output format can be set with the following characters:
+--
+--    T  Plain text output.
+--    A  ANSI-colored text output
+--    H  Colorized HTML + CSS output.
+--
+-- The default output format is plain text. It's set to ANSI-colored text
+-- if the COLORTERM variable is set. Note: this is independent of any output
+-- redirection, which is actually considered a feature.
+--
+-- You probably want to use less -R to enjoy viewing ANSI-colored text from
+-- a pipe or a file. Add this to your ~/.bashrc: export LESS="-R"
+--
+------------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20004, "LuaJIT core/library version mismatch")
+local jutil = require("jit.util")
+local vmdef = require("jit.vmdef")
+local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
+local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
+local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
+local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
+local bit = require("bit")
+local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local sub, gsub, format = string.sub, string.gsub, string.format
+local byte, char, rep = string.byte, string.char, string.rep
+local type, tostring = type, tostring
+local stdout, stderr = io.stdout, io.stderr
+
+-- Load other modules on-demand.
+local bcline, disass
+
+-- Active flag, output file handle and dump mode.
+local active, out, dumpmode
+
+------------------------------------------------------------------------------
+
+local symtabmt = { __index = false }
+local symtab = {}
+local nexitsym = 0
+
+-- Fill nested symbol table with per-trace exit stub addresses.
+local function fillsymtab_tr(tr, nexit)
+  local t = {}
+  symtabmt.__index = t
+  if jit.arch == "mips" or jit.arch == "mipsel" then
+    t[traceexitstub(tr, 0)] = "exit"
+    return
+  end
+  for i=0,nexit-1 do
+    local addr = traceexitstub(tr, i)
+    t[addr] = tostring(i)
+  end
+  local addr = traceexitstub(tr, nexit)
+  if addr then t[addr] = "stack_check" end
+end
+
+-- Fill symbol table with trace exit stub addresses.
+local function fillsymtab(tr, nexit)
+  local t = symtab
+  if nexitsym == 0 then
+    local ircall = vmdef.ircall
+    for i=0,#ircall do
+      local addr = ircalladdr(i)
+      if addr ~= 0 then t[addr] = ircall[i] end
+    end
+  end
+  if nexitsym == 1000000 then -- Per-trace exit stubs.
+    fillsymtab_tr(tr, nexit)
+  elseif nexit > nexitsym then -- Shared exit stubs.
+    for i=nexitsym,nexit-1 do
+      local addr = traceexitstub(i)
+      if addr == nil then -- Fall back to per-trace exit stubs.
+	fillsymtab_tr(tr, nexit)
+	setmetatable(symtab, symtabmt)
+	nexit = 1000000
+	break
+      end
+      t[addr] = tostring(i)
+    end
+    nexitsym = nexit
+  end
+  return t
+end
+
+local function dumpwrite(s)
+  out:write(s)
+end
+
+-- Disassemble machine code.
+local function dump_mcode(tr)
+  local info = traceinfo(tr)
+  if not info then return end
+  local mcode, addr, loop = tracemc(tr)
+  if not mcode then return end
+  if not disass then disass = require("jit.dis_"..jit.arch) end
+  out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
+  local ctx = disass.create(mcode, addr, dumpwrite)
+  ctx.hexdump = 0
+  ctx.symtab = fillsymtab(tr, info.nexit)
+  if loop ~= 0 then
+    symtab[addr+loop] = "LOOP"
+    ctx:disass(0, loop)
+    out:write("->LOOP:\n")
+    ctx:disass(loop, #mcode-loop)
+    symtab[addr+loop] = nil
+  else
+    ctx:disass(0, #mcode)
+  end
+end
+
+------------------------------------------------------------------------------
+
+local irtype_text = {
+  [0] = "nil",
+  "fal",
+  "tru",
+  "lud",
+  "str",
+  "p32",
+  "thr",
+  "pro",
+  "fun",
+  "p64",
+  "cdt",
+  "tab",
+  "udt",
+  "flt",
+  "num",
+  "i8 ",
+  "u8 ",
+  "i16",
+  "u16",
+  "int",
+  "u32",
+  "i64",
+  "u64",
+  "sfp",
+}
+
+local colortype_ansi = {
+  [0] = "%s",
+  "%s",
+  "%s",
+  "\027[36m%s\027[m",
+  "\027[32m%s\027[m",
+  "%s",
+  "\027[1m%s\027[m",
+  "%s",
+  "\027[1m%s\027[m",
+  "%s",
+  "\027[33m%s\027[m",
+  "\027[31m%s\027[m",
+  "\027[36m%s\027[m",
+  "\027[34m%s\027[m",
+  "\027[34m%s\027[m",
+  "\027[35m%s\027[m",
+  "\027[35m%s\027[m",
+  "\027[35m%s\027[m",
+  "\027[35m%s\027[m",
+  "\027[35m%s\027[m",
+  "\027[35m%s\027[m",
+  "\027[35m%s\027[m",
+  "\027[35m%s\027[m",
+  "\027[35m%s\027[m",
+}
+
+local function colorize_text(s, t)
+  return s
+end
+
+local function colorize_ansi(s, t)
+  return format(colortype_ansi[t], s)
+end
+
+local irtype_ansi = setmetatable({},
+  { __index = function(tab, t)
+      local s = colorize_ansi(irtype_text[t], t); tab[t] = s; return s; end })
+
+local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", }
+
+local function colorize_html(s, t)
+  s = gsub(s, "[<>&]", html_escape)
+  return format('<span class="irt_%s">%s</span>', irtype_text[t], s)
+end
+
+local irtype_html = setmetatable({},
+  { __index = function(tab, t)
+      local s = colorize_html(irtype_text[t], t); tab[t] = s; return s; end })
+
+local header_html = [[
+<style type="text/css">
+background { background: #ffffff; color: #000000; }
+pre.ljdump {
+font-size: 10pt;
+background: #f0f4ff;
+color: #000000;
+border: 1px solid #bfcfff;
+padding: 0.5em;
+margin-left: 2em;
+margin-right: 2em;
+}
+span.irt_str { color: #00a000; }
+span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; }
+span.irt_tab { color: #c00000; }
+span.irt_udt, span.irt_lud { color: #00c0c0; }
+span.irt_num { color: #4040c0; }
+span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
+</style>
+]]
+
+local colorize, irtype
+
+-- Lookup tables to convert some literals into names.
+local litname = {
+  ["SLOAD "] = setmetatable({}, { __index = function(t, mode)
+    local s = ""
+    if band(mode, 1) ~= 0 then s = s.."P" end
+    if band(mode, 2) ~= 0 then s = s.."F" end
+    if band(mode, 4) ~= 0 then s = s.."T" end
+    if band(mode, 8) ~= 0 then s = s.."C" end
+    if band(mode, 16) ~= 0 then s = s.."R" end
+    if band(mode, 32) ~= 0 then s = s.."I" end
+    t[mode] = s
+    return s
+  end}),
+  ["XLOAD "] = { [0] = "", "R", "V", "RV", "U", "RU", "VU", "RVU", },
+  ["CONV  "] = setmetatable({}, { __index = function(t, mode)
+    local s = irtype[band(mode, 31)]
+    s = irtype[band(shr(mode, 5), 31)].."."..s
+    if band(mode, 0x400) ~= 0 then s = s.." trunc"
+    elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
+    local c = shr(mode, 14)
+    if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
+    t[mode] = s
+    return s
+  end}),
+  ["FLOAD "] = vmdef.irfield,
+  ["FREF  "] = vmdef.irfield,
+  ["FPMATH"] = vmdef.irfpm,
+}
+
+local function ctlsub(c)
+  if c == "\n" then return "\\n"
+  elseif c == "\r" then return "\\r"
+  elseif c == "\t" then return "\\t"
+  else return format("\\%03d", byte(c))
+  end
+end
+
+local function fmtfunc(func, pc)
+  local fi = funcinfo(func, pc)
+  if fi.loc then
+    return fi.loc
+  elseif fi.ffid then
+    return vmdef.ffnames[fi.ffid]
+  elseif fi.addr then
+    return format("C:%x", fi.addr)
+  else
+    return "(?)"
+  end
+end
+
+local function formatk(tr, idx)
+  local k, t, slot = tracek(tr, idx)
+  local tn = type(k)
+  local s
+  if tn == "number" then
+    if k == 2^52+2^51 then
+      s = "bias"
+    else
+      s = format("%+.14g", k)
+    end
+  elseif tn == "string" then
+    s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
+  elseif tn == "function" then
+    s = fmtfunc(k)
+  elseif tn == "table" then
+    s = format("{%p}", k)
+  elseif tn == "userdata" then
+    if t == 12 then
+      s = format("userdata:%p", k)
+    else
+      s = format("[%p]", k)
+      if s == "[0x00000000]" then s = "NULL" end
+    end
+  elseif t == 21 then -- int64_t
+    s = sub(tostring(k), 1, -3)
+    if sub(s, 1, 1) ~= "-" then s = "+"..s end
+  else
+    s = tostring(k) -- For primitives.
+  end
+  s = colorize(format("%-4s", s), t)
+  if slot then
+    s = format("%s @%d", s, slot)
+  end
+  return s
+end
+
+local function printsnap(tr, snap)
+  local n = 2
+  for s=0,snap[1]-1 do
+    local sn = snap[n]
+    if shr(sn, 24) == s then
+      n = n + 1
+      local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
+      if ref < 0 then
+	out:write(formatk(tr, ref))
+      elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
+	out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
+      else
+	local m, ot, op1, op2 = traceir(tr, ref)
+	out:write(colorize(format("%04d", ref), band(ot, 31)))
+      end
+      out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME
+    else
+      out:write("---- ")
+    end
+  end
+  out:write("]\n")
+end
+
+-- Dump snapshots (not interleaved with IR).
+local function dump_snap(tr)
+  out:write("---- TRACE ", tr, " snapshots\n")
+  for i=0,1000000000 do
+    local snap = tracesnap(tr, i)
+    if not snap then break end
+    out:write(format("#%-3d %04d [ ", i, snap[0]))
+    printsnap(tr, snap)
+  end
+end
+
+-- Return a register name or stack slot for a rid/sp location.
+local function ridsp_name(ridsp, ins)
+  if not disass then disass = require("jit.dis_"..jit.arch) end
+  local rid, slot = band(ridsp, 0xff), shr(ridsp, 8)
+  if rid == 253 or rid == 254 then
+    return (slot == 0 or slot == 255) and " {sink" or format(" {%04d", ins-slot)
+  end
+  if ridsp > 255 then return format("[%x]", slot*4) end
+  if rid < 128 then return disass.regname(rid) end
+  return ""
+end
+
+-- Dump CALL* function ref and return optional ctype.
+local function dumpcallfunc(tr, ins)
+  local ctype
+  if ins > 0 then
+    local m, ot, op1, op2 = traceir(tr, ins)
+    if band(ot, 31) == 0 then -- nil type means CARG(func, ctype).
+      ins = op1
+      ctype = formatk(tr, op2)
+    end
+  end
+  if ins < 0 then
+    out:write(format("[0x%x](", tonumber((tracek(tr, ins)))))
+  else
+    out:write(format("%04d (", ins))
+  end
+  return ctype
+end
+
+-- Recursively gather CALL* args and dump them.
+local function dumpcallargs(tr, ins)
+  if ins < 0 then
+    out:write(formatk(tr, ins))
+  else
+    local m, ot, op1, op2 = traceir(tr, ins)
+    local oidx = 6*shr(ot, 8)
+    local op = sub(vmdef.irnames, oidx+1, oidx+6)
+    if op == "CARG  " then
+      dumpcallargs(tr, op1)
+      if op2 < 0 then
+	out:write(" ", formatk(tr, op2))
+      else
+	out:write(" ", format("%04d", op2))
+      end
+    else
+      out:write(format("%04d", ins))
+    end
+  end
+end
+
+-- Dump IR and interleaved snapshots.
+local function dump_ir(tr, dumpsnap, dumpreg)
+  local info = traceinfo(tr)
+  if not info then return end
+  local nins = info.nins
+  out:write("---- TRACE ", tr, " IR\n")
+  local irnames = vmdef.irnames
+  local snapref = 65536
+  local snap, snapno
+  if dumpsnap then
+    snap = tracesnap(tr, 0)
+    snapref = snap[0]
+    snapno = 0
+  end
+  for ins=1,nins do
+    if ins >= snapref then
+      if dumpreg then
+	out:write(format("....              SNAP   #%-3d [ ", snapno))
+      else
+	out:write(format("....        SNAP   #%-3d [ ", snapno))
+      end
+      printsnap(tr, snap)
+      snapno = snapno + 1
+      snap = tracesnap(tr, snapno)
+      snapref = snap and snap[0] or 65536
+    end
+    local m, ot, op1, op2, ridsp = traceir(tr, ins)
+    local oidx, t = 6*shr(ot, 8), band(ot, 31)
+    local op = sub(irnames, oidx+1, oidx+6)
+    if op == "LOOP  " then
+      if dumpreg then
+	out:write(format("%04d ------------ LOOP ------------\n", ins))
+      else
+	out:write(format("%04d ------ LOOP ------------\n", ins))
+      end
+    elseif op ~= "NOP   " and op ~= "CARG  " and
+	   (dumpreg or op ~= "RENAME") then
+      local rid = band(ridsp, 255)
+      if dumpreg then
+	out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
+      else
+	out:write(format("%04d ", ins))
+      end
+      out:write(format("%s%s %s %s ",
+		       (rid == 254 or rid == 253) and "}" or
+		       (band(ot, 128) == 0 and " " or ">"),
+		       band(ot, 64) == 0 and " " or "+",
+		       irtype[t], op))
+      local m1, m2 = band(m, 3), band(m, 3*4)
+      if sub(op, 1, 4) == "CALL" then
+	local ctype
+	if m2 == 1*4 then -- op2 == IRMlit
+	  out:write(format("%-10s  (", vmdef.ircall[op2]))
+	else
+	  ctype = dumpcallfunc(tr, op2)
+	end
+	if op1 ~= -1 then dumpcallargs(tr, op1) end
+	out:write(")")
+	if ctype then out:write(" ctype ", ctype) end
+      elseif op == "CNEW  " and op2 == -1 then
+	out:write(formatk(tr, op1))
+      elseif m1 ~= 3 then -- op1 != IRMnone
+	if op1 < 0 then
+	  out:write(formatk(tr, op1))
+	else
+	  out:write(format(m1 == 0 and "%04d" or "#%-3d", op1))
+	end
+	if m2 ~= 3*4 then -- op2 != IRMnone
+	  if m2 == 1*4 then -- op2 == IRMlit
+	    local litn = litname[op]
+	    if litn and litn[op2] then
+	      out:write("  ", litn[op2])
+	    elseif op == "UREFO " or op == "UREFC " then
+	      out:write(format("  #%-3d", shr(op2, 8)))
+	    else
+	      out:write(format("  #%-3d", op2))
+	    end
+	  elseif op2 < 0 then
+	    out:write("  ", formatk(tr, op2))
+	  else
+	    out:write(format("  %04d", op2))
+	  end
+	end
+      end
+      out:write("\n")
+    end
+  end
+  if snap then
+    if dumpreg then
+      out:write(format("....              SNAP   #%-3d [ ", snapno))
+    else
+      out:write(format("....        SNAP   #%-3d [ ", snapno))
+    end
+    printsnap(tr, snap)
+  end
+end
+
+------------------------------------------------------------------------------
+
+local recprefix = ""
+local recdepth = 0
+
+-- Format trace error message.
+local function fmterr(err, info)
+  if type(err) == "number" then
+    if type(info) == "function" then info = fmtfunc(info) end
+    err = format(vmdef.traceerr[err], info)
+  end
+  return err
+end
+
+-- Dump trace states.
+local function dump_trace(what, tr, func, pc, otr, oex)
+  if what == "stop" or (what == "abort" and dumpmode.a) then
+    if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop")
+    elseif dumpmode.s then dump_snap(tr) end
+    if dumpmode.m then dump_mcode(tr) end
+  end
+  if what == "start" then
+    if dumpmode.H then out:write('<pre class="ljdump">\n') end
+    out:write("---- TRACE ", tr, " ", what)
+    if otr then out:write(" ", otr, "/", oex) end
+    out:write(" ", fmtfunc(func, pc), "\n")
+  elseif what == "stop" or what == "abort" then
+    out:write("---- TRACE ", tr, " ", what)
+    if what == "abort" then
+      out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
+    else
+      local info = traceinfo(tr)
+      local link, ltype = info.link, info.linktype
+      if link == tr or link == 0 then
+	out:write(" -> ", ltype, "\n")
+      elseif ltype == "root" then
+	out:write(" -> ", link, "\n")
+      else
+	out:write(" -> ", link, " ", ltype, "\n")
+      end
+    end
+    if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
+  else
+    out:write("---- TRACE ", what, "\n\n")
+  end
+  out:flush()
+end
+
+-- Dump recorded bytecode.
+local function dump_record(tr, func, pc, depth, callee)
+  if depth ~= recdepth then
+    recdepth = depth
+    recprefix = rep(" .", depth)
+  end
+  local line
+  if pc >= 0 then
+    line = bcline(func, pc, recprefix)
+    if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end
+  else
+    line = "0000 "..recprefix.." FUNCC      \n"
+    callee = func
+  end
+  if pc <= 0 then
+    out:write(sub(line, 1, -2), "         ; ", fmtfunc(func), "\n")
+  else
+    out:write(line)
+  end
+  if pc >= 0 and band(funcbc(func, pc), 0xff) < 16 then -- ORDER BC
+    out:write(bcline(func, pc+1, recprefix)) -- Write JMP for cond.
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Dump taken trace exits.
+local function dump_texit(tr, ex, ngpr, nfpr, ...)
+  out:write("---- TRACE ", tr, " exit ", ex, "\n")
+  if dumpmode.X then
+    local regs = {...}
+    if jit.arch == "x64" then
+      for i=1,ngpr do
+	out:write(format(" %016x", regs[i]))
+	if i % 4 == 0 then out:write("\n") end
+      end
+    else
+      for i=1,ngpr do
+	out:write(format(" %08x", regs[i]))
+	if i % 8 == 0 then out:write("\n") end
+      end
+    end
+    if jit.arch == "mips" or jit.arch == "mipsel" then
+      for i=1,nfpr,2 do
+	out:write(format(" %+17.14g", regs[ngpr+i]))
+	if i % 8 == 7 then out:write("\n") end
+      end
+    else
+      for i=1,nfpr do
+	out:write(format(" %+17.14g", regs[ngpr+i]))
+	if i % 4 == 0 then out:write("\n") end
+      end
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Detach dump handlers.
+local function dumpoff()
+  if active then
+    active = false
+    jit.attach(dump_texit)
+    jit.attach(dump_record)
+    jit.attach(dump_trace)
+    if out and out ~= stdout and out ~= stderr then out:close() end
+    out = nil
+  end
+end
+
+-- Open the output file and attach dump handlers.
+local function dumpon(opt, outfile)
+  if active then dumpoff() end
+
+  local colormode = os.getenv("COLORTERM") and "A" or "T"
+  if opt then
+    opt = gsub(opt, "[TAH]", function(mode) colormode = mode; return ""; end)
+  end
+
+  local m = { t=true, b=true, i=true, m=true, }
+  if opt and opt ~= "" then
+    local o = sub(opt, 1, 1)
+    if o ~= "+" and o ~= "-" then m = {} end
+    for i=1,#opt do m[sub(opt, i, i)] = (o ~= "-") end
+  end
+  dumpmode = m
+
+  if m.t or m.b or m.i or m.s or m.m then
+    jit.attach(dump_trace, "trace")
+  end
+  if m.b then
+    jit.attach(dump_record, "record")
+    if not bcline then bcline = require("jit.bc").line end
+  end
+  if m.x or m.X then
+    jit.attach(dump_texit, "texit")
+  end
+
+  if not outfile then outfile = os.getenv("LUAJIT_DUMPFILE") end
+  if outfile then
+    out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+  else
+    out = stdout
+  end
+
+  m[colormode] = true
+  if colormode == "A" then
+    colorize = colorize_ansi
+    irtype = irtype_ansi
+  elseif colormode == "H" then
+    colorize = colorize_html
+    irtype = irtype_html
+    out:write(header_html)
+  else
+    colorize = colorize_text
+    irtype = irtype_text
+  end
+
+  active = true
+end
+
+-- Public module functions.
+module(...)
+
+on = dumpon
+off = dumpoff
+start = dumpon -- For -j command line option.
+

+ 167 - 0
luajit.mod/luajit/src/jit/v.lua

@@ -0,0 +1,167 @@
+----------------------------------------------------------------------------
+-- Verbose mode of the LuaJIT compiler.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module shows verbose information about the progress of the
+-- JIT compiler. It prints one line for each generated trace. This module
+-- is useful to see which code has been compiled or where the compiler
+-- punts and falls back to the interpreter.
+--
+-- Example usage:
+--
+--   luajit -jv -e "for i=1,1000 do for j=1,1000 do end end"
+--   luajit -jv=myapp.out myapp.lua
+--
+-- Default output is to stderr. To redirect the output to a file, pass a
+-- filename as an argument (use '-' for stdout) or set the environment
+-- variable LUAJIT_VERBOSEFILE. The file is overwritten every time the
+-- module is started.
+--
+-- The output from the first example should look like this:
+--
+-- [TRACE   1 (command line):1 loop]
+-- [TRACE   2 (1/3) (command line):1 -> 1]
+--
+-- The first number in each line is the internal trace number. Next are
+-- the file name ('(command line)') and the line number (':1') where the
+-- trace has started. Side traces also show the parent trace number and
+-- the exit number where they are attached to in parentheses ('(1/3)').
+-- An arrow at the end shows where the trace links to ('-> 1'), unless
+-- it loops to itself.
+--
+-- In this case the inner loop gets hot and is traced first, generating
+-- a root trace. Then the last exit from the 1st trace gets hot, too,
+-- and triggers generation of the 2nd trace. The side trace follows the
+-- path along the outer loop and *around* the inner loop, back to its
+-- start, and then links to the 1st trace. Yes, this may seem unusual,
+-- if you know how traditional compilers work. Trace compilers are full
+-- of surprises like this -- have fun! :-)
+--
+-- Aborted traces are shown like this:
+--
+-- [TRACE --- foo.lua:44 -- leaving loop in root trace at foo:lua:50]
+--
+-- Don't worry -- trace aborts are quite common, even in programs which
+-- can be fully compiled. The compiler may retry several times until it
+-- finds a suitable trace.
+--
+-- Of course this doesn't work with features that are not-yet-implemented
+-- (NYI error messages). The VM simply falls back to the interpreter. This
+-- may not matter at all if the particular trace is not very high up in
+-- the CPU usage profile. Oh, and the interpreter is quite fast, too.
+--
+-- Also check out the -jdump module, which prints all the gory details.
+--
+------------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20004, "LuaJIT core/library version mismatch")
+local jutil = require("jit.util")
+local vmdef = require("jit.vmdef")
+local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
+local type, format = type, string.format
+local stdout, stderr = io.stdout, io.stderr
+
+-- Active flag and output file handle.
+local active, out
+
+------------------------------------------------------------------------------
+
+local startloc, startex
+
+local function fmtfunc(func, pc)
+  local fi = funcinfo(func, pc)
+  if fi.loc then
+    return fi.loc
+  elseif fi.ffid then
+    return vmdef.ffnames[fi.ffid]
+  elseif fi.addr then
+    return format("C:%x", fi.addr)
+  else
+    return "(?)"
+  end
+end
+
+-- Format trace error message.
+local function fmterr(err, info)
+  if type(err) == "number" then
+    if type(info) == "function" then info = fmtfunc(info) end
+    err = format(vmdef.traceerr[err], info)
+  end
+  return err
+end
+
+-- Dump trace states.
+local function dump_trace(what, tr, func, pc, otr, oex)
+  if what == "start" then
+    startloc = fmtfunc(func, pc)
+    startex = otr and "("..otr.."/"..oex..") " or ""
+  else
+    if what == "abort" then
+      local loc = fmtfunc(func, pc)
+      if loc ~= startloc then
+	out:write(format("[TRACE --- %s%s -- %s at %s]\n",
+	  startex, startloc, fmterr(otr, oex), loc))
+      else
+	out:write(format("[TRACE --- %s%s -- %s]\n",
+	  startex, startloc, fmterr(otr, oex)))
+      end
+    elseif what == "stop" then
+      local info = traceinfo(tr)
+      local link, ltype = info.link, info.linktype
+      if ltype == "interpreter" then
+	out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
+	  tr, startex, startloc))
+      elseif link == tr or link == 0 then
+	out:write(format("[TRACE %3s %s%s %s]\n",
+	  tr, startex, startloc, ltype))
+      elseif ltype == "root" then
+	out:write(format("[TRACE %3s %s%s -> %d]\n",
+	  tr, startex, startloc, link))
+      else
+	out:write(format("[TRACE %3s %s%s -> %d %s]\n",
+	  tr, startex, startloc, link, ltype))
+      end
+    else
+      out:write(format("[TRACE %s]\n", what))
+    end
+    out:flush()
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Detach dump handlers.
+local function dumpoff()
+  if active then
+    active = false
+    jit.attach(dump_trace)
+    if out and out ~= stdout and out ~= stderr then out:close() end
+    out = nil
+  end
+end
+
+-- Open the output file and attach dump handlers.
+local function dumpon(outfile)
+  if active then dumpoff() end
+  if not outfile then outfile = os.getenv("LUAJIT_VERBOSEFILE") end
+  if outfile then
+    out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+  else
+    out = stderr
+  end
+  jit.attach(dump_trace, "trace")
+  active = true
+end
+
+-- Public module functions.
+module(...)
+
+on = dumpon
+off = dumpoff
+start = dumpon -- For -j command line option.
+

+ 167 - 0
luajit.mod/luajit/src/lauxlib.h

@@ -0,0 +1,167 @@
+/*
+** $Id: lauxlib.h,v 1.88.1.1 2007/12/27 13:02:25 roberto Exp $
+** Auxiliary functions for building Lua libraries
+** See Copyright Notice in lua.h
+*/
+
+
+#ifndef lauxlib_h
+#define lauxlib_h
+
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include "lua.h"
+
+
+#define luaL_getn(L,i)          ((int)lua_objlen(L, i))
+#define luaL_setn(L,i,j)        ((void)0)  /* no op! */
+
+/* extra error code for `luaL_load' */
+#define LUA_ERRFILE     (LUA_ERRERR+1)
+
+typedef struct luaL_Reg {
+  const char *name;
+  lua_CFunction func;
+} luaL_Reg;
+
+LUALIB_API void (luaL_openlib) (lua_State *L, const char *libname,
+                                const luaL_Reg *l, int nup);
+LUALIB_API void (luaL_register) (lua_State *L, const char *libname,
+                                const luaL_Reg *l);
+LUALIB_API int (luaL_getmetafield) (lua_State *L, int obj, const char *e);
+LUALIB_API int (luaL_callmeta) (lua_State *L, int obj, const char *e);
+LUALIB_API int (luaL_typerror) (lua_State *L, int narg, const char *tname);
+LUALIB_API int (luaL_argerror) (lua_State *L, int numarg, const char *extramsg);
+LUALIB_API const char *(luaL_checklstring) (lua_State *L, int numArg,
+                                                          size_t *l);
+LUALIB_API const char *(luaL_optlstring) (lua_State *L, int numArg,
+                                          const char *def, size_t *l);
+LUALIB_API lua_Number (luaL_checknumber) (lua_State *L, int numArg);
+LUALIB_API lua_Number (luaL_optnumber) (lua_State *L, int nArg, lua_Number def);
+
+LUALIB_API lua_Integer (luaL_checkinteger) (lua_State *L, int numArg);
+LUALIB_API lua_Integer (luaL_optinteger) (lua_State *L, int nArg,
+                                          lua_Integer def);
+
+LUALIB_API void (luaL_checkstack) (lua_State *L, int sz, const char *msg);
+LUALIB_API void (luaL_checktype) (lua_State *L, int narg, int t);
+LUALIB_API void (luaL_checkany) (lua_State *L, int narg);
+
+LUALIB_API int   (luaL_newmetatable) (lua_State *L, const char *tname);
+LUALIB_API void *(luaL_checkudata) (lua_State *L, int ud, const char *tname);
+
+LUALIB_API void (luaL_where) (lua_State *L, int lvl);
+LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...);
+
+LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
+                                   const char *const lst[]);
+
+LUALIB_API int (luaL_ref) (lua_State *L, int t);
+LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
+
+LUALIB_API int (luaL_loadfile) (lua_State *L, const char *filename);
+LUALIB_API int (luaL_loadbuffer) (lua_State *L, const char *buff, size_t sz,
+                                  const char *name);
+LUALIB_API int (luaL_loadstring) (lua_State *L, const char *s);
+
+LUALIB_API lua_State *(luaL_newstate) (void);
+
+
+LUALIB_API const char *(luaL_gsub) (lua_State *L, const char *s, const char *p,
+                                                  const char *r);
+
+LUALIB_API const char *(luaL_findtable) (lua_State *L, int idx,
+                                         const char *fname, int szhint);
+
+/* From Lua 5.2. */
+LUALIB_API int luaL_fileresult(lua_State *L, int stat, const char *fname);
+LUALIB_API int luaL_execresult(lua_State *L, int stat);
+LUALIB_API int (luaL_loadfilex) (lua_State *L, const char *filename,
+				 const char *mode);
+LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz,
+				   const char *name, const char *mode);
+LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
+				int level);
+
+
+/*
+** ===============================================================
+** some useful macros
+** ===============================================================
+*/
+
+#define luaL_argcheck(L, cond,numarg,extramsg)	\
+		((void)((cond) || luaL_argerror(L, (numarg), (extramsg))))
+#define luaL_checkstring(L,n)	(luaL_checklstring(L, (n), NULL))
+#define luaL_optstring(L,n,d)	(luaL_optlstring(L, (n), (d), NULL))
+#define luaL_checkint(L,n)	((int)luaL_checkinteger(L, (n)))
+#define luaL_optint(L,n,d)	((int)luaL_optinteger(L, (n), (d)))
+#define luaL_checklong(L,n)	((long)luaL_checkinteger(L, (n)))
+#define luaL_optlong(L,n,d)	((long)luaL_optinteger(L, (n), (d)))
+
+#define luaL_typename(L,i)	lua_typename(L, lua_type(L,(i)))
+
+#define luaL_dofile(L, fn) \
+	(luaL_loadfile(L, fn) || lua_pcall(L, 0, LUA_MULTRET, 0))
+
+#define luaL_dostring(L, s) \
+	(luaL_loadstring(L, s) || lua_pcall(L, 0, LUA_MULTRET, 0))
+
+#define luaL_getmetatable(L,n)	(lua_getfield(L, LUA_REGISTRYINDEX, (n)))
+
+#define luaL_opt(L,f,n,d)	(lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
+
+/*
+** {======================================================
+** Generic Buffer manipulation
+** =======================================================
+*/
+
+
+
+typedef struct luaL_Buffer {
+  char *p;			/* current position in buffer */
+  int lvl;  /* number of strings in the stack (level) */
+  lua_State *L;
+  char buffer[LUAL_BUFFERSIZE];
+} luaL_Buffer;
+
+#define luaL_addchar(B,c) \
+  ((void)((B)->p < ((B)->buffer+LUAL_BUFFERSIZE) || luaL_prepbuffer(B)), \
+   (*(B)->p++ = (char)(c)))
+
+/* compatibility only */
+#define luaL_putchar(B,c)	luaL_addchar(B,c)
+
+#define luaL_addsize(B,n)	((B)->p += (n))
+
+LUALIB_API void (luaL_buffinit) (lua_State *L, luaL_Buffer *B);
+LUALIB_API char *(luaL_prepbuffer) (luaL_Buffer *B);
+LUALIB_API void (luaL_addlstring) (luaL_Buffer *B, const char *s, size_t l);
+LUALIB_API void (luaL_addstring) (luaL_Buffer *B, const char *s);
+LUALIB_API void (luaL_addvalue) (luaL_Buffer *B);
+LUALIB_API void (luaL_pushresult) (luaL_Buffer *B);
+
+
+/* }====================================================== */
+
+
+/* compatibility with ref system */
+
+/* pre-defined references */
+#define LUA_NOREF       (-2)
+#define LUA_REFNIL      (-1)
+
+#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \
+      (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0))
+
+#define lua_unref(L,ref)        luaL_unref(L, LUA_REGISTRYINDEX, (ref))
+
+#define lua_getref(L,ref)       lua_rawgeti(L, LUA_REGISTRYINDEX, (ref))
+
+
+#define luaL_reg	luaL_Reg
+
+#endif

+ 356 - 0
luajit.mod/luajit/src/lib_aux.c

@@ -0,0 +1,356 @@
+/*
+** Auxiliary library for the Lua/C API.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major parts taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#define lib_aux_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_state.h"
+#include "lj_trace.h"
+#include "lj_lib.h"
+
+#if LJ_TARGET_POSIX
+#include <sys/wait.h>
+#endif
+
+/* -- I/O error handling -------------------------------------------------- */
+
+LUALIB_API int luaL_fileresult(lua_State *L, int stat, const char *fname)
+{
+  if (stat) {
+    setboolV(L->top++, 1);
+    return 1;
+  } else {
+    int en = errno;  /* Lua API calls may change this value. */
+    setnilV(L->top++);
+    if (fname)
+      lua_pushfstring(L, "%s: %s", fname, strerror(en));
+    else
+      lua_pushfstring(L, "%s", strerror(en));
+    setintV(L->top++, en);
+    lj_trace_abort(G(L));
+    return 3;
+  }
+}
+
+LUALIB_API int luaL_execresult(lua_State *L, int stat)
+{
+  if (stat != -1) {
+#if LJ_TARGET_POSIX
+    if (WIFSIGNALED(stat)) {
+      stat = WTERMSIG(stat);
+      setnilV(L->top++);
+      lua_pushliteral(L, "signal");
+    } else {
+      if (WIFEXITED(stat))
+	stat = WEXITSTATUS(stat);
+      if (stat == 0)
+	setboolV(L->top++, 1);
+      else
+	setnilV(L->top++);
+      lua_pushliteral(L, "exit");
+    }
+#else
+    if (stat == 0)
+      setboolV(L->top++, 1);
+    else
+      setnilV(L->top++);
+    lua_pushliteral(L, "exit");
+#endif
+    setintV(L->top++, stat);
+    return 3;
+  }
+  return luaL_fileresult(L, 0, NULL);
+}
+
+/* -- Module registration ------------------------------------------------- */
+
+LUALIB_API const char *luaL_findtable(lua_State *L, int idx,
+				      const char *fname, int szhint)
+{
+  const char *e;
+  lua_pushvalue(L, idx);
+  do {
+    e = strchr(fname, '.');
+    if (e == NULL) e = fname + strlen(fname);
+    lua_pushlstring(L, fname, (size_t)(e - fname));
+    lua_rawget(L, -2);
+    if (lua_isnil(L, -1)) {  /* no such field? */
+      lua_pop(L, 1);  /* remove this nil */
+      lua_createtable(L, 0, (*e == '.' ? 1 : szhint)); /* new table for field */
+      lua_pushlstring(L, fname, (size_t)(e - fname));
+      lua_pushvalue(L, -2);
+      lua_settable(L, -4);  /* set new table into field */
+    } else if (!lua_istable(L, -1)) {  /* field has a non-table value? */
+      lua_pop(L, 2);  /* remove table and value */
+      return fname;  /* return problematic part of the name */
+    }
+    lua_remove(L, -2);  /* remove previous table */
+    fname = e + 1;
+  } while (*e == '.');
+  return NULL;
+}
+
+static int libsize(const luaL_Reg *l)
+{
+  int size = 0;
+  for (; l->name; l++) size++;
+  return size;
+}
+
+LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
+			     const luaL_Reg *l, int nup)
+{
+  lj_lib_checkfpu(L);
+  if (libname) {
+    int size = libsize(l);
+    /* check whether lib already exists */
+    luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
+    lua_getfield(L, -1, libname);  /* get _LOADED[libname] */
+    if (!lua_istable(L, -1)) {  /* not found? */
+      lua_pop(L, 1);  /* remove previous result */
+      /* try global variable (and create one if it does not exist) */
+      if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL)
+	lj_err_callerv(L, LJ_ERR_BADMODN, libname);
+      lua_pushvalue(L, -1);
+      lua_setfield(L, -3, libname);  /* _LOADED[libname] = new table */
+    }
+    lua_remove(L, -2);  /* remove _LOADED table */
+    lua_insert(L, -(nup+1));  /* move library table to below upvalues */
+  }
+  for (; l->name; l++) {
+    int i;
+    for (i = 0; i < nup; i++)  /* copy upvalues to the top */
+      lua_pushvalue(L, -nup);
+    lua_pushcclosure(L, l->func, nup);
+    lua_setfield(L, -(nup+2), l->name);
+  }
+  lua_pop(L, nup);  /* remove upvalues */
+}
+
+LUALIB_API void luaL_register(lua_State *L, const char *libname,
+			      const luaL_Reg *l)
+{
+  luaL_openlib(L, libname, l, 0);
+}
+
+LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
+				 const char *p, const char *r)
+{
+  const char *wild;
+  size_t l = strlen(p);
+  luaL_Buffer b;
+  luaL_buffinit(L, &b);
+  while ((wild = strstr(s, p)) != NULL) {
+    luaL_addlstring(&b, s, (size_t)(wild - s));  /* push prefix */
+    luaL_addstring(&b, r);  /* push replacement in place of pattern */
+    s = wild + l;  /* continue after `p' */
+  }
+  luaL_addstring(&b, s);  /* push last suffix */
+  luaL_pushresult(&b);
+  return lua_tostring(L, -1);
+}
+
+/* -- Buffer handling ----------------------------------------------------- */
+
+#define bufflen(B)	((size_t)((B)->p - (B)->buffer))
+#define bufffree(B)	((size_t)(LUAL_BUFFERSIZE - bufflen(B)))
+
+static int emptybuffer(luaL_Buffer *B)
+{
+  size_t l = bufflen(B);
+  if (l == 0)
+    return 0;  /* put nothing on stack */
+  lua_pushlstring(B->L, B->buffer, l);
+  B->p = B->buffer;
+  B->lvl++;
+  return 1;
+}
+
+static void adjuststack(luaL_Buffer *B)
+{
+  if (B->lvl > 1) {
+    lua_State *L = B->L;
+    int toget = 1;  /* number of levels to concat */
+    size_t toplen = lua_strlen(L, -1);
+    do {
+      size_t l = lua_strlen(L, -(toget+1));
+      if (!(B->lvl - toget + 1 >= LUA_MINSTACK/2 || toplen > l))
+	break;
+      toplen += l;
+      toget++;
+    } while (toget < B->lvl);
+    lua_concat(L, toget);
+    B->lvl = B->lvl - toget + 1;
+  }
+}
+
+LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
+{
+  if (emptybuffer(B))
+    adjuststack(B);
+  return B->buffer;
+}
+
+LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
+{
+  while (l--)
+    luaL_addchar(B, *s++);
+}
+
+LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
+{
+  luaL_addlstring(B, s, strlen(s));
+}
+
+LUALIB_API void luaL_pushresult(luaL_Buffer *B)
+{
+  emptybuffer(B);
+  lua_concat(B->L, B->lvl);
+  B->lvl = 1;
+}
+
+LUALIB_API void luaL_addvalue(luaL_Buffer *B)
+{
+  lua_State *L = B->L;
+  size_t vl;
+  const char *s = lua_tolstring(L, -1, &vl);
+  if (vl <= bufffree(B)) {  /* fit into buffer? */
+    memcpy(B->p, s, vl);  /* put it there */
+    B->p += vl;
+    lua_pop(L, 1);  /* remove from stack */
+  } else {
+    if (emptybuffer(B))
+      lua_insert(L, -2);  /* put buffer before new value */
+    B->lvl++;  /* add new value into B stack */
+    adjuststack(B);
+  }
+}
+
+LUALIB_API void luaL_buffinit(lua_State *L, luaL_Buffer *B)
+{
+  B->L = L;
+  B->p = B->buffer;
+  B->lvl = 0;
+}
+
+/* -- Reference management ------------------------------------------------ */
+
+#define FREELIST_REF	0
+
+/* Convert a stack index to an absolute index. */
+#define abs_index(L, i) \
+  ((i) > 0 || (i) <= LUA_REGISTRYINDEX ? (i) : lua_gettop(L) + (i) + 1)
+
+LUALIB_API int luaL_ref(lua_State *L, int t)
+{
+  int ref;
+  t = abs_index(L, t);
+  if (lua_isnil(L, -1)) {
+    lua_pop(L, 1);  /* remove from stack */
+    return LUA_REFNIL;  /* `nil' has a unique fixed reference */
+  }
+  lua_rawgeti(L, t, FREELIST_REF);  /* get first free element */
+  ref = (int)lua_tointeger(L, -1);  /* ref = t[FREELIST_REF] */
+  lua_pop(L, 1);  /* remove it from stack */
+  if (ref != 0) {  /* any free element? */
+    lua_rawgeti(L, t, ref);  /* remove it from list */
+    lua_rawseti(L, t, FREELIST_REF);  /* (t[FREELIST_REF] = t[ref]) */
+  } else {  /* no free elements */
+    ref = (int)lua_objlen(L, t);
+    ref++;  /* create new reference */
+  }
+  lua_rawseti(L, t, ref);
+  return ref;
+}
+
+LUALIB_API void luaL_unref(lua_State *L, int t, int ref)
+{
+  if (ref >= 0) {
+    t = abs_index(L, t);
+    lua_rawgeti(L, t, FREELIST_REF);
+    lua_rawseti(L, t, ref);  /* t[ref] = t[FREELIST_REF] */
+    lua_pushinteger(L, ref);
+    lua_rawseti(L, t, FREELIST_REF);  /* t[FREELIST_REF] = ref */
+  }
+}
+
+/* -- Default allocator and panic function -------------------------------- */
+
+static int panic(lua_State *L)
+{
+  const char *s = lua_tostring(L, -1);
+  fputs("PANIC: unprotected error in call to Lua API (", stderr);
+  fputs(s ? s : "?", stderr);
+  fputc(')', stderr); fputc('\n', stderr);
+  fflush(stderr);
+  return 0;
+}
+
+#ifdef LUAJIT_USE_SYSMALLOC
+
+#if LJ_64 && !defined(LUAJIT_USE_VALGRIND)
+#error "Must use builtin allocator for 64 bit target"
+#endif
+
+static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
+{
+  (void)ud;
+  (void)osize;
+  if (nsize == 0) {
+    free(ptr);
+    return NULL;
+  } else {
+    return realloc(ptr, nsize);
+  }
+}
+
+LUALIB_API lua_State *luaL_newstate(void)
+{
+  lua_State *L = lua_newstate(mem_alloc, NULL);
+  if (L) G(L)->panic = panic;
+  return L;
+}
+
+#else
+
+#include "lj_alloc.h"
+
+LUALIB_API lua_State *luaL_newstate(void)
+{
+  lua_State *L;
+  void *ud = lj_alloc_create();
+  if (ud == NULL) return NULL;
+#if LJ_64
+  L = lj_state_newstate(lj_alloc_f, ud);
+#else
+  L = lua_newstate(lj_alloc_f, ud);
+#endif
+  if (L) G(L)->panic = panic;
+  return L;
+}
+
+#if LJ_64
+LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
+{
+  UNUSED(f); UNUSED(ud);
+  fputs("Must use luaL_newstate() for 64 bit target\n", stderr);
+  return NULL;
+}
+#endif
+
+#endif
+

+ 683 - 0
luajit.mod/luajit/src/lib_base.c

@@ -0,0 +1,683 @@
+/*
+** Base and coroutine library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <stdio.h>
+
+#define lib_base_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_debug.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_meta.h"
+#include "lj_state.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cconv.h"
+#endif
+#include "lj_bc.h"
+#include "lj_ff.h"
+#include "lj_dispatch.h"
+#include "lj_char.h"
+#include "lj_strscan.h"
+#include "lj_lib.h"
+
+/* -- Base library: checks ------------------------------------------------ */
+
+#define LJLIB_MODULE_base
+
+LJLIB_ASM(assert)		LJLIB_REC(.)
+{
+  GCstr *s;
+  lj_lib_checkany(L, 1);
+  s = lj_lib_optstr(L, 2);
+  if (s)
+    lj_err_callermsg(L, strdata(s));
+  else
+    lj_err_caller(L, LJ_ERR_ASSERT);
+  return FFH_UNREACHABLE;
+}
+
+/* ORDER LJ_T */
+LJLIB_PUSH("nil")
+LJLIB_PUSH("boolean")
+LJLIB_PUSH(top-1)  /* boolean */
+LJLIB_PUSH("userdata")
+LJLIB_PUSH("string")
+LJLIB_PUSH("upval")
+LJLIB_PUSH("thread")
+LJLIB_PUSH("proto")
+LJLIB_PUSH("function")
+LJLIB_PUSH("trace")
+LJLIB_PUSH("cdata")
+LJLIB_PUSH("table")
+LJLIB_PUSH(top-9)  /* userdata */
+LJLIB_PUSH("number")
+LJLIB_ASM_(type)		LJLIB_REC(.)
+/* Recycle the lj_lib_checkany(L, 1) from assert. */
+
+/* -- Base library: iterators --------------------------------------------- */
+
+/* This solves a circular dependency problem -- change FF_next_N as needed. */
+LJ_STATIC_ASSERT((int)FF_next == FF_next_N);
+
+LJLIB_ASM(next)
+{
+  lj_lib_checktab(L, 1);
+  return FFH_UNREACHABLE;
+}
+
+#if LJ_52 || LJ_HASFFI
+static int ffh_pairs(lua_State *L, MMS mm)
+{
+  TValue *o = lj_lib_checkany(L, 1);
+  cTValue *mo = lj_meta_lookup(L, o, mm);
+  if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
+    L->top = o+1;  /* Only keep one argument. */
+    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    return FFH_TAILCALL;
+  } else {
+    if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
+    setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
+    if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
+    return FFH_RES(3);
+  }
+}
+#else
+#define ffh_pairs(L, mm)	(lj_lib_checktab(L, 1), FFH_UNREACHABLE)
+#endif
+
+LJLIB_PUSH(lastcl)
+LJLIB_ASM(pairs)
+{
+  return ffh_pairs(L, MM_pairs);
+}
+
+LJLIB_NOREGUV LJLIB_ASM(ipairs_aux)	LJLIB_REC(.)
+{
+  lj_lib_checktab(L, 1);
+  lj_lib_checkint(L, 2);
+  return FFH_UNREACHABLE;
+}
+
+LJLIB_PUSH(lastcl)
+LJLIB_ASM(ipairs)		LJLIB_REC(.)
+{
+  return ffh_pairs(L, MM_ipairs);
+}
+
+/* -- Base library: getters and setters ----------------------------------- */
+
+LJLIB_ASM_(getmetatable)	LJLIB_REC(.)
+/* Recycle the lj_lib_checkany(L, 1) from assert. */
+
+LJLIB_ASM(setmetatable)		LJLIB_REC(.)
+{
+  GCtab *t = lj_lib_checktab(L, 1);
+  GCtab *mt = lj_lib_checktabornil(L, 2);
+  if (!tvisnil(lj_meta_lookup(L, L->base, MM_metatable)))
+    lj_err_caller(L, LJ_ERR_PROTMT);
+  setgcref(t->metatable, obj2gco(mt));
+  if (mt) { lj_gc_objbarriert(L, t, mt); }
+  settabV(L, L->base-1, t);
+  return FFH_RES(1);
+}
+
+LJLIB_CF(getfenv)
+{
+  GCfunc *fn;
+  cTValue *o = L->base;
+  if (!(o < L->top && tvisfunc(o))) {
+    int level = lj_lib_optint(L, 1, 1);
+    o = lj_debug_frame(L, level, &level);
+    if (o == NULL)
+      lj_err_arg(L, 1, LJ_ERR_INVLVL);
+  }
+  fn = &gcval(o)->fn;
+  settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
+  return 1;
+}
+
+LJLIB_CF(setfenv)
+{
+  GCfunc *fn;
+  GCtab *t = lj_lib_checktab(L, 2);
+  cTValue *o = L->base;
+  if (!(o < L->top && tvisfunc(o))) {
+    int level = lj_lib_checkint(L, 1);
+    if (level == 0) {
+      /* NOBARRIER: A thread (i.e. L) is never black. */
+      setgcref(L->env, obj2gco(t));
+      return 0;
+    }
+    o = lj_debug_frame(L, level, &level);
+    if (o == NULL)
+      lj_err_arg(L, 1, LJ_ERR_INVLVL);
+  }
+  fn = &gcval(o)->fn;
+  if (!isluafunc(fn))
+    lj_err_caller(L, LJ_ERR_SETFENV);
+  setgcref(fn->l.env, obj2gco(t));
+  lj_gc_objbarrier(L, obj2gco(fn), t);
+  setfuncV(L, L->top++, fn);
+  return 1;
+}
+
+LJLIB_ASM(rawget)		LJLIB_REC(.)
+{
+  lj_lib_checktab(L, 1);
+  lj_lib_checkany(L, 2);
+  return FFH_UNREACHABLE;
+}
+
+LJLIB_CF(rawset)		LJLIB_REC(.)
+{
+  lj_lib_checktab(L, 1);
+  lj_lib_checkany(L, 2);
+  L->top = 1+lj_lib_checkany(L, 3);
+  lua_rawset(L, 1);
+  return 1;
+}
+
+LJLIB_CF(rawequal)		LJLIB_REC(.)
+{
+  cTValue *o1 = lj_lib_checkany(L, 1);
+  cTValue *o2 = lj_lib_checkany(L, 2);
+  setboolV(L->top-1, lj_obj_equal(o1, o2));
+  return 1;
+}
+
+#if LJ_52
+LJLIB_CF(rawlen)		LJLIB_REC(.)
+{
+  cTValue *o = L->base;
+  int32_t len;
+  if (L->top > o && tvisstr(o))
+    len = (int32_t)strV(o)->len;
+  else
+    len = (int32_t)lj_tab_len(lj_lib_checktab(L, 1));
+  setintV(L->top-1, len);
+  return 1;
+}
+#endif
+
+LJLIB_CF(unpack)
+{
+  GCtab *t = lj_lib_checktab(L, 1);
+  int32_t n, i = lj_lib_optint(L, 2, 1);
+  int32_t e = (L->base+3-1 < L->top && !tvisnil(L->base+3-1)) ?
+	      lj_lib_checkint(L, 3) : (int32_t)lj_tab_len(t);
+  if (i > e) return 0;
+  n = e - i + 1;
+  if (n <= 0 || !lua_checkstack(L, n))
+    lj_err_caller(L, LJ_ERR_UNPACK);
+  do {
+    cTValue *tv = lj_tab_getint(t, i);
+    if (tv) {
+      copyTV(L, L->top++, tv);
+    } else {
+      setnilV(L->top++);
+    }
+  } while (i++ < e);
+  return n;
+}
+
+LJLIB_CF(select)		LJLIB_REC(.)
+{
+  int32_t n = (int32_t)(L->top - L->base);
+  if (n >= 1 && tvisstr(L->base) && *strVdata(L->base) == '#') {
+    setintV(L->top-1, n-1);
+    return 1;
+  } else {
+    int32_t i = lj_lib_checkint(L, 1);
+    if (i < 0) i = n + i; else if (i > n) i = n;
+    if (i < 1)
+      lj_err_arg(L, 1, LJ_ERR_IDXRNG);
+    return n - i;
+  }
+}
+
+/* -- Base library: conversions ------------------------------------------- */
+
+LJLIB_ASM(tonumber)		LJLIB_REC(.)
+{
+  int32_t base = lj_lib_optint(L, 2, 10);
+  if (base == 10) {
+    TValue *o = lj_lib_checkany(L, 1);
+    if (lj_strscan_numberobj(o)) {
+      copyTV(L, L->base-1, o);
+      return FFH_RES(1);
+    }
+#if LJ_HASFFI
+    if (tviscdata(o)) {
+      CTState *cts = ctype_cts(L);
+      CType *ct = lj_ctype_rawref(cts, cdataV(o)->ctypeid);
+      if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
+      if (ctype_isnum(ct->info) || ctype_iscomplex(ct->info)) {
+	if (LJ_DUALNUM && ctype_isinteger_or_bool(ct->info) &&
+	    ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
+	  int32_t i;
+	  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
+	  setintV(L->base-1, i);
+	  return FFH_RES(1);
+	}
+	lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
+		       (uint8_t *)&(L->base-1)->n, o, 0);
+	return FFH_RES(1);
+      }
+    }
+#endif
+  } else {
+    const char *p = strdata(lj_lib_checkstr(L, 1));
+    char *ep;
+    unsigned long ul;
+    if (base < 2 || base > 36)
+      lj_err_arg(L, 2, LJ_ERR_BASERNG);
+    ul = strtoul(p, &ep, base);
+    if (p != ep) {
+      while (lj_char_isspace((unsigned char)(*ep))) ep++;
+      if (*ep == '\0') {
+	if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
+	  setintV(L->base-1, (int32_t)ul);
+	else
+	  setnumV(L->base-1, (lua_Number)ul);
+	return FFH_RES(1);
+      }
+    }
+  }
+  setnilV(L->base-1);
+  return FFH_RES(1);
+}
+
+LJLIB_PUSH("nil")
+LJLIB_PUSH("false")
+LJLIB_PUSH("true")
+LJLIB_ASM(tostring)		LJLIB_REC(.)
+{
+  TValue *o = lj_lib_checkany(L, 1);
+  cTValue *mo;
+  L->top = o+1;  /* Only keep one argument. */
+  if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    return FFH_TAILCALL;
+  } else {
+    GCstr *s;
+    if (tvisnumber(o)) {
+      s = lj_str_fromnumber(L, o);
+    } else if (tvispri(o)) {
+      s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
+    } else {
+      if (tvisfunc(o) && isffunc(funcV(o)))
+	lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
+      else
+	lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
+      /* Note: lua_pushfstring calls the GC which may invalidate o. */
+      s = strV(L->top-1);
+    }
+    setstrV(L, L->base-1, s);
+    return FFH_RES(1);
+  }
+}
+
+/* -- Base library: throw and catch errors -------------------------------- */
+
+LJLIB_CF(error)
+{
+  int32_t level = lj_lib_optint(L, 2, 1);
+  lua_settop(L, 1);
+  if (lua_isstring(L, 1) && level > 0) {
+    luaL_where(L, level);
+    lua_pushvalue(L, 1);
+    lua_concat(L, 2);
+  }
+  return lua_error(L);
+}
+
+LJLIB_ASM(pcall)		LJLIB_REC(.)
+{
+  lj_lib_checkany(L, 1);
+  lj_lib_checkfunc(L, 2);  /* For xpcall only. */
+  return FFH_UNREACHABLE;
+}
+LJLIB_ASM_(xpcall)		LJLIB_REC(.)
+
+/* -- Base library: load Lua code ----------------------------------------- */
+
+static int load_aux(lua_State *L, int status, int envarg)
+{
+  if (status == 0) {
+    if (tvistab(L->base+envarg-1)) {
+      GCfunc *fn = funcV(L->top-1);
+      GCtab *t = tabV(L->base+envarg-1);
+      setgcref(fn->c.env, obj2gco(t));
+      lj_gc_objbarrier(L, fn, t);
+    }
+    return 1;
+  } else {
+    setnilV(L->top-2);
+    return 2;
+  }
+}
+
+LJLIB_CF(loadfile)
+{
+  GCstr *fname = lj_lib_optstr(L, 1);
+  GCstr *mode = lj_lib_optstr(L, 2);
+  int status;
+  lua_settop(L, 3);  /* Ensure env arg exists. */
+  status = luaL_loadfilex(L, fname ? strdata(fname) : NULL,
+			  mode ? strdata(mode) : NULL);
+  return load_aux(L, status, 3);
+}
+
+static const char *reader_func(lua_State *L, void *ud, size_t *size)
+{
+  UNUSED(ud);
+  luaL_checkstack(L, 2, "too many nested functions");
+  copyTV(L, L->top++, L->base);
+  lua_call(L, 0, 1);  /* Call user-supplied function. */
+  L->top--;
+  if (tvisnil(L->top)) {
+    *size = 0;
+    return NULL;
+  } else if (tvisstr(L->top) || tvisnumber(L->top)) {
+    copyTV(L, L->base+4, L->top);  /* Anchor string in reserved stack slot. */
+    return lua_tolstring(L, 5, size);
+  } else {
+    lj_err_caller(L, LJ_ERR_RDRSTR);
+    return NULL;
+  }
+}
+
+LJLIB_CF(load)
+{
+  GCstr *name = lj_lib_optstr(L, 2);
+  GCstr *mode = lj_lib_optstr(L, 3);
+  int status;
+  if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) {
+    GCstr *s = lj_lib_checkstr(L, 1);
+    lua_settop(L, 4);  /* Ensure env arg exists. */
+    status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s),
+			      mode ? strdata(mode) : NULL);
+  } else {
+    lj_lib_checkfunc(L, 1);
+    lua_settop(L, 5);  /* Reserve a slot for the string from the reader. */
+    status = lua_loadx(L, reader_func, NULL, name ? strdata(name) : "=(load)",
+		       mode ? strdata(mode) : NULL);
+  }
+  return load_aux(L, status, 4);
+}
+
+LJLIB_CF(loadstring)
+{
+  return lj_cf_load(L);
+}
+
+LJLIB_CF(dofile)
+{
+  GCstr *fname = lj_lib_optstr(L, 1);
+  setnilV(L->top);
+  L->top = L->base+1;
+  if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0)
+    lua_error(L);
+  lua_call(L, 0, LUA_MULTRET);
+  return (int)(L->top - L->base) - 1;
+}
+
+/* -- Base library: GC control -------------------------------------------- */
+
+LJLIB_CF(gcinfo)
+{
+  setintV(L->top++, (G(L)->gc.total >> 10));
+  return 1;
+}
+
+LJLIB_CF(collectgarbage)
+{
+  int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT,  /* ORDER LUA_GC* */
+    "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul");
+  int32_t data = lj_lib_optint(L, 2, 0);
+  if (opt == LUA_GCCOUNT) {
+    setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
+  } else {
+    int res = lua_gc(L, opt, data);
+    if (opt == LUA_GCSTEP)
+      setboolV(L->top, res);
+    else
+      setintV(L->top, res);
+  }
+  L->top++;
+  return 1;
+}
+
+/* -- Base library: miscellaneous functions ------------------------------- */
+
+LJLIB_PUSH(top-2)  /* Upvalue holds weak table. */
+LJLIB_CF(newproxy)
+{
+  lua_settop(L, 1);
+  lua_newuserdata(L, 0);
+  if (lua_toboolean(L, 1) == 0) {  /* newproxy(): without metatable. */
+    return 1;
+  } else if (lua_isboolean(L, 1)) {  /* newproxy(true): with metatable. */
+    lua_newtable(L);
+    lua_pushvalue(L, -1);
+    lua_pushboolean(L, 1);
+    lua_rawset(L, lua_upvalueindex(1));  /* Remember mt in weak table. */
+  } else {  /* newproxy(proxy): inherit metatable. */
+    int validproxy = 0;
+    if (lua_getmetatable(L, 1)) {
+      lua_rawget(L, lua_upvalueindex(1));
+      validproxy = lua_toboolean(L, -1);
+      lua_pop(L, 1);
+    }
+    if (!validproxy)
+      lj_err_arg(L, 1, LJ_ERR_NOPROXY);
+    lua_getmetatable(L, 1);
+  }
+  lua_setmetatable(L, 2);
+  return 1;
+}
+
+LJLIB_PUSH("tostring")
+LJLIB_CF(print)
+{
+  ptrdiff_t i, nargs = L->top - L->base;
+  cTValue *tv = lj_tab_getstr(tabref(L->env), strV(lj_lib_upvalue(L, 1)));
+  int shortcut;
+  if (tv && !tvisnil(tv)) {
+    copyTV(L, L->top++, tv);
+  } else {
+    setstrV(L, L->top++, strV(lj_lib_upvalue(L, 1)));
+    lua_gettable(L, LUA_GLOBALSINDEX);
+    tv = L->top-1;
+  }
+  shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
+  for (i = 0; i < nargs; i++) {
+    const char *str;
+    size_t size;
+    cTValue *o = &L->base[i];
+    if (shortcut && tvisstr(o)) {
+      str = strVdata(o);
+      size = strV(o)->len;
+    } else if (shortcut && tvisint(o)) {
+      char buf[LJ_STR_INTBUF];
+      char *p = lj_str_bufint(buf, intV(o));
+      size = (size_t)(buf+LJ_STR_INTBUF-p);
+      str = p;
+    } else if (shortcut && tvisnum(o)) {
+      char buf[LJ_STR_NUMBUF];
+      size = lj_str_bufnum(buf, o);
+      str = buf;
+    } else {
+      copyTV(L, L->top+1, o);
+      copyTV(L, L->top, L->top-1);
+      L->top += 2;
+      lua_call(L, 1, 1);
+      str = lua_tolstring(L, -1, &size);
+      if (!str)
+	lj_err_caller(L, LJ_ERR_PRTOSTR);
+      L->top--;
+    }
+    if (i)
+      putchar('\t');
+    fwrite(str, 1, size, stdout);
+  }
+  putchar('\n');
+  return 0;
+}
+
+LJLIB_PUSH(top-3)
+LJLIB_SET(_VERSION)
+
+#include "lj_libdef.h"
+
+/* -- Coroutine library --------------------------------------------------- */
+
+#define LJLIB_MODULE_coroutine
+
+LJLIB_CF(coroutine_status)
+{
+  const char *s;
+  lua_State *co;
+  if (!(L->top > L->base && tvisthread(L->base)))
+    lj_err_arg(L, 1, LJ_ERR_NOCORO);
+  co = threadV(L->base);
+  if (co == L) s = "running";
+  else if (co->status == LUA_YIELD) s = "suspended";
+  else if (co->status != 0) s = "dead";
+  else if (co->base > tvref(co->stack)+1) s = "normal";
+  else if (co->top == co->base) s = "dead";
+  else s = "suspended";
+  lua_pushstring(L, s);
+  return 1;
+}
+
+LJLIB_CF(coroutine_running)
+{
+#if LJ_52
+  int ismain = lua_pushthread(L);
+  setboolV(L->top++, ismain);
+  return 2;
+#else
+  if (lua_pushthread(L))
+    setnilV(L->top++);
+  return 1;
+#endif
+}
+
+LJLIB_CF(coroutine_create)
+{
+  lua_State *L1;
+  if (!(L->base < L->top && tvisfunc(L->base)))
+    lj_err_argt(L, 1, LUA_TFUNCTION);
+  L1 = lua_newthread(L);
+  setfuncV(L, L1->top++, funcV(L->base));
+  return 1;
+}
+
+LJLIB_ASM(coroutine_yield)
+{
+  lj_err_caller(L, LJ_ERR_CYIELD);
+  return FFH_UNREACHABLE;
+}
+
+static int ffh_resume(lua_State *L, lua_State *co, int wrap)
+{
+  if (co->cframe != NULL || co->status > LUA_YIELD ||
+      (co->status == 0 && co->top == co->base)) {
+    ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
+    if (wrap) lj_err_caller(L, em);
+    setboolV(L->base-1, 0);
+    setstrV(L, L->base, lj_err_str(L, em));
+    return FFH_RES(2);
+  }
+  lj_state_growstack(co, (MSize)(L->top - L->base));
+  return FFH_RETRY;
+}
+
+LJLIB_ASM(coroutine_resume)
+{
+  if (!(L->top > L->base && tvisthread(L->base)))
+    lj_err_arg(L, 1, LJ_ERR_NOCORO);
+  return ffh_resume(L, threadV(L->base), 0);
+}
+
+LJLIB_NOREG LJLIB_ASM(coroutine_wrap_aux)
+{
+  return ffh_resume(L, threadV(lj_lib_upvalue(L, 1)), 1);
+}
+
+/* Inline declarations. */
+LJ_ASMF void lj_ff_coroutine_wrap_aux(void);
+#if !(LJ_TARGET_MIPS && defined(ljamalg_c))
+LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
+							  lua_State *co);
+#endif
+
+/* Error handler, called from assembler VM. */
+void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co)
+{
+  co->top--; copyTV(L, L->top, co->top); L->top++;
+  if (tvisstr(L->top-1))
+    lj_err_callermsg(L, strVdata(L->top-1));
+  else
+    lj_err_run(L);
+}
+
+/* Forward declaration. */
+static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
+
+LJLIB_CF(coroutine_wrap)
+{
+  lj_cf_coroutine_create(L);
+  lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
+  setpc_wrap_aux(L, funcV(L->top-1));
+  return 1;
+}
+
+#include "lj_libdef.h"
+
+/* Fix the PC of wrap_aux. Really ugly workaround. */
+static void setpc_wrap_aux(lua_State *L, GCfunc *fn)
+{
+  setmref(fn->c.pc, &L2GG(L)->bcff[lj_lib_init_coroutine[1]+2]);
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void newproxy_weaktable(lua_State *L)
+{
+  /* NOBARRIER: The table is new (marked white). */
+  GCtab *t = lj_tab_new(L, 0, 1);
+  settabV(L, L->top++, t);
+  setgcref(t->metatable, obj2gco(t));
+  setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
+	    lj_str_newlit(L, "kv"));
+  t->nomm = (uint8_t)(~(1u<<MM_mode));
+}
+
+LUALIB_API int luaopen_base(lua_State *L)
+{
+  /* NOBARRIER: Table and value are the same. */
+  GCtab *env = tabref(L->env);
+  settabV(L, lj_tab_setstr(L, env, lj_str_newlit(L, "_G")), env);
+  lua_pushliteral(L, LUA_VERSION);  /* top-3. */
+  newproxy_weaktable(L);  /* top-2. */
+  LJ_LIB_REG(L, "_G", base);
+  LJ_LIB_REG(L, LUA_COLIBNAME, coroutine);
+  return 2;
+}
+

+ 74 - 0
luajit.mod/luajit/src/lib_bit.c

@@ -0,0 +1,74 @@
+/*
+** Bit manipulation library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_bit_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_bit
+
+LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_unary IR_TOBIT)
+{
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+}
+LJLIB_ASM_(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
+LJLIB_ASM_(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
+
+LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
+{
+  lj_lib_checknumber(L, 1);
+  lj_lib_checkbit(L, 2);
+  return FFH_RETRY;
+}
+LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
+LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
+LJLIB_ASM_(bit_rol)		LJLIB_REC(bit_shift IR_BROL)
+LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
+
+LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
+{
+  int i = 0;
+  do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
+  return FFH_RETRY;
+}
+LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
+LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(bit_tohex)
+{
+  uint32_t b = (uint32_t)lj_lib_checkbit(L, 1);
+  int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2);
+  const char *hexdigits = "0123456789abcdef";
+  char buf[8];
+  if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
+  if (n > 8) n = 8;
+  for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
+  lua_pushlstring(L, buf, (size_t)n);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_bit(lua_State *L)
+{
+  LJ_LIB_REG(L, LUA_BITLIBNAME, bit);
+  return 1;
+}
+

+ 405 - 0
luajit.mod/luajit/src/lib_debug.c

@@ -0,0 +1,405 @@
+/*
+** Debug library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lib_debug_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_debug.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_debug
+
+LJLIB_CF(debug_getregistry)
+{
+  copyTV(L, L->top++, registry(L));
+  return 1;
+}
+
+LJLIB_CF(debug_getmetatable)
+{
+  lj_lib_checkany(L, 1);
+  if (!lua_getmetatable(L, 1)) {
+    setnilV(L->top-1);
+  }
+  return 1;
+}
+
+LJLIB_CF(debug_setmetatable)
+{
+  lj_lib_checktabornil(L, 2);
+  L->top = L->base+2;
+  lua_setmetatable(L, 1);
+#if !LJ_52
+  setboolV(L->top-1, 1);
+#endif
+  return 1;
+}
+
+LJLIB_CF(debug_getfenv)
+{
+  lj_lib_checkany(L, 1);
+  lua_getfenv(L, 1);
+  return 1;
+}
+
+LJLIB_CF(debug_setfenv)
+{
+  lj_lib_checktab(L, 2);
+  L->top = L->base+2;
+  if (!lua_setfenv(L, 1))
+    lj_err_caller(L, LJ_ERR_SETFENV);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void settabss(lua_State *L, const char *i, const char *v)
+{
+  lua_pushstring(L, v);
+  lua_setfield(L, -2, i);
+}
+
+static void settabsi(lua_State *L, const char *i, int v)
+{
+  lua_pushinteger(L, v);
+  lua_setfield(L, -2, i);
+}
+
+static void settabsb(lua_State *L, const char *i, int v)
+{
+  lua_pushboolean(L, v);
+  lua_setfield(L, -2, i);
+}
+
+static lua_State *getthread(lua_State *L, int *arg)
+{
+  if (L->base < L->top && tvisthread(L->base)) {
+    *arg = 1;
+    return threadV(L->base);
+  } else {
+    *arg = 0;
+    return L;
+  }
+}
+
+static void treatstackoption(lua_State *L, lua_State *L1, const char *fname)
+{
+  if (L == L1) {
+    lua_pushvalue(L, -2);
+    lua_remove(L, -3);
+  }
+  else
+    lua_xmove(L1, L, 1);
+  lua_setfield(L, -2, fname);
+}
+
+LJLIB_CF(debug_getinfo)
+{
+  lj_Debug ar;
+  int arg, opt_f = 0, opt_L = 0;
+  lua_State *L1 = getthread(L, &arg);
+  const char *options = luaL_optstring(L, arg+2, "flnSu");
+  if (lua_isnumber(L, arg+1)) {
+    if (!lua_getstack(L1, (int)lua_tointeger(L, arg+1), (lua_Debug *)&ar)) {
+      setnilV(L->top-1);
+      return 1;
+    }
+  } else if (L->base+arg < L->top && tvisfunc(L->base+arg)) {
+    options = lua_pushfstring(L, ">%s", options);
+    setfuncV(L1, L1->top++, funcV(L->base+arg));
+  } else {
+    lj_err_arg(L, arg+1, LJ_ERR_NOFUNCL);
+  }
+  if (!lj_debug_getinfo(L1, options, &ar, 1))
+    lj_err_arg(L, arg+2, LJ_ERR_INVOPT);
+  lua_createtable(L, 0, 16);  /* Create result table. */
+  for (; *options; options++) {
+    switch (*options) {
+    case 'S':
+      settabss(L, "source", ar.source);
+      settabss(L, "short_src", ar.short_src);
+      settabsi(L, "linedefined", ar.linedefined);
+      settabsi(L, "lastlinedefined", ar.lastlinedefined);
+      settabss(L, "what", ar.what);
+      break;
+    case 'l':
+      settabsi(L, "currentline", ar.currentline);
+      break;
+    case 'u':
+      settabsi(L, "nups", ar.nups);
+      settabsi(L, "nparams", ar.nparams);
+      settabsb(L, "isvararg", ar.isvararg);
+      break;
+    case 'n':
+      settabss(L, "name", ar.name);
+      settabss(L, "namewhat", ar.namewhat);
+      break;
+    case 'f': opt_f = 1; break;
+    case 'L': opt_L = 1; break;
+    default: break;
+    }
+  }
+  if (opt_L) treatstackoption(L, L1, "activelines");
+  if (opt_f) treatstackoption(L, L1, "func");
+  return 1;  /* Return result table. */
+}
+
+LJLIB_CF(debug_getlocal)
+{
+  int arg;
+  lua_State *L1 = getthread(L, &arg);
+  lua_Debug ar;
+  const char *name;
+  int slot = lj_lib_checkint(L, arg+2);
+  if (tvisfunc(L->base+arg)) {
+    L->top = L->base+arg+1;
+    lua_pushstring(L, lua_getlocal(L, NULL, slot));
+    return 1;
+  }
+  if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar))
+    lj_err_arg(L, arg+1, LJ_ERR_LVLRNG);
+  name = lua_getlocal(L1, &ar, slot);
+  if (name) {
+    lua_xmove(L1, L, 1);
+    lua_pushstring(L, name);
+    lua_pushvalue(L, -2);
+    return 2;
+  } else {
+    setnilV(L->top-1);
+    return 1;
+  }
+}
+
+LJLIB_CF(debug_setlocal)
+{
+  int arg;
+  lua_State *L1 = getthread(L, &arg);
+  lua_Debug ar;
+  TValue *tv;
+  if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar))
+    lj_err_arg(L, arg+1, LJ_ERR_LVLRNG);
+  tv = lj_lib_checkany(L, arg+3);
+  copyTV(L1, L1->top++, tv);
+  lua_pushstring(L, lua_setlocal(L1, &ar, lj_lib_checkint(L, arg+2)));
+  return 1;
+}
+
+static int debug_getupvalue(lua_State *L, int get)
+{
+  int32_t n = lj_lib_checkint(L, 2);
+  const char *name;
+  lj_lib_checkfunc(L, 1);
+  name = get ? lua_getupvalue(L, 1, n) : lua_setupvalue(L, 1, n);
+  if (name) {
+    lua_pushstring(L, name);
+    if (!get) return 1;
+    copyTV(L, L->top, L->top-2);
+    L->top++;
+    return 2;
+  }
+  return 0;
+}
+
+LJLIB_CF(debug_getupvalue)
+{
+  return debug_getupvalue(L, 1);
+}
+
+LJLIB_CF(debug_setupvalue)
+{
+  lj_lib_checkany(L, 3);
+  return debug_getupvalue(L, 0);
+}
+
+LJLIB_CF(debug_upvalueid)
+{
+  GCfunc *fn = lj_lib_checkfunc(L, 1);
+  int32_t n = lj_lib_checkint(L, 2) - 1;
+  if ((uint32_t)n >= fn->l.nupvalues)
+    lj_err_arg(L, 2, LJ_ERR_IDXRNG);
+  setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
+					(void *)&fn->c.upvalue[n]);
+  return 1;
+}
+
+LJLIB_CF(debug_upvaluejoin)
+{
+  GCfunc *fn[2];
+  GCRef *p[2];
+  int i;
+  for (i = 0; i < 2; i++) {
+    int32_t n;
+    fn[i] = lj_lib_checkfunc(L, 2*i+1);
+    if (!isluafunc(fn[i]))
+      lj_err_arg(L, 2*i+1, LJ_ERR_NOLFUNC);
+    n = lj_lib_checkint(L, 2*i+2) - 1;
+    if ((uint32_t)n >= fn[i]->l.nupvalues)
+      lj_err_arg(L, 2*i+2, LJ_ERR_IDXRNG);
+    p[i] = &fn[i]->l.uvptr[n];
+  }
+  setgcrefr(*p[0], *p[1]);
+  lj_gc_objbarrier(L, fn[0], gcref(*p[1]));
+  return 0;
+}
+
+#if LJ_52
+LJLIB_CF(debug_getuservalue)
+{
+  TValue *o = L->base;
+  if (o < L->top && tvisudata(o))
+    settabV(L, o, tabref(udataV(o)->env));
+  else
+    setnilV(o);
+  L->top = o+1;
+  return 1;
+}
+
+LJLIB_CF(debug_setuservalue)
+{
+  TValue *o = L->base;
+  if (!(o < L->top && tvisudata(o)))
+    lj_err_argt(L, 1, LUA_TUSERDATA);
+  if (!(o+1 < L->top && tvistab(o+1)))
+    lj_err_argt(L, 2, LUA_TTABLE);
+  L->top = o+2;
+  lua_setfenv(L, 1);
+  return 1;
+}
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+static const char KEY_HOOK = 'h';
+
+static void hookf(lua_State *L, lua_Debug *ar)
+{
+  static const char *const hooknames[] =
+    {"call", "return", "line", "count", "tail return"};
+  lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+  lua_rawget(L, LUA_REGISTRYINDEX);
+  if (lua_isfunction(L, -1)) {
+    lua_pushstring(L, hooknames[(int)ar->event]);
+    if (ar->currentline >= 0)
+      lua_pushinteger(L, ar->currentline);
+    else lua_pushnil(L);
+    lua_call(L, 2, 0);
+  }
+}
+
+static int makemask(const char *smask, int count)
+{
+  int mask = 0;
+  if (strchr(smask, 'c')) mask |= LUA_MASKCALL;
+  if (strchr(smask, 'r')) mask |= LUA_MASKRET;
+  if (strchr(smask, 'l')) mask |= LUA_MASKLINE;
+  if (count > 0) mask |= LUA_MASKCOUNT;
+  return mask;
+}
+
+static char *unmakemask(int mask, char *smask)
+{
+  int i = 0;
+  if (mask & LUA_MASKCALL) smask[i++] = 'c';
+  if (mask & LUA_MASKRET) smask[i++] = 'r';
+  if (mask & LUA_MASKLINE) smask[i++] = 'l';
+  smask[i] = '\0';
+  return smask;
+}
+
+LJLIB_CF(debug_sethook)
+{
+  int arg, mask, count;
+  lua_Hook func;
+  (void)getthread(L, &arg);
+  if (lua_isnoneornil(L, arg+1)) {
+    lua_settop(L, arg+1);
+    func = NULL; mask = 0; count = 0;  /* turn off hooks */
+  } else {
+    const char *smask = luaL_checkstring(L, arg+2);
+    luaL_checktype(L, arg+1, LUA_TFUNCTION);
+    count = luaL_optint(L, arg+3, 0);
+    func = hookf; mask = makemask(smask, count);
+  }
+  lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+  lua_pushvalue(L, arg+1);
+  lua_rawset(L, LUA_REGISTRYINDEX);
+  lua_sethook(L, func, mask, count);
+  return 0;
+}
+
+LJLIB_CF(debug_gethook)
+{
+  char buff[5];
+  int mask = lua_gethookmask(L);
+  lua_Hook hook = lua_gethook(L);
+  if (hook != NULL && hook != hookf) {  /* external hook? */
+    lua_pushliteral(L, "external hook");
+  } else {
+    lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+    lua_rawget(L, LUA_REGISTRYINDEX);   /* get hook */
+  }
+  lua_pushstring(L, unmakemask(mask, buff));
+  lua_pushinteger(L, lua_gethookcount(L));
+  return 3;
+}
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(debug_debug)
+{
+  for (;;) {
+    char buffer[250];
+    fputs("lua_debug> ", stderr);
+    if (fgets(buffer, sizeof(buffer), stdin) == 0 ||
+	strcmp(buffer, "cont\n") == 0)
+      return 0;
+    if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") ||
+	lua_pcall(L, 0, 0, 0)) {
+      fputs(lua_tostring(L, -1), stderr);
+      fputs("\n", stderr);
+    }
+    lua_settop(L, 0);  /* remove eventual returns */
+  }
+}
+
+/* ------------------------------------------------------------------------ */
+
+#define LEVELS1	12	/* size of the first part of the stack */
+#define LEVELS2	10	/* size of the second part of the stack */
+
+LJLIB_CF(debug_traceback)
+{
+  int arg;
+  lua_State *L1 = getthread(L, &arg);
+  const char *msg = lua_tostring(L, arg+1);
+  if (msg == NULL && L->top > L->base+arg)
+    L->top = L->base+arg+1;
+  else
+    luaL_traceback(L, L1, msg, lj_lib_optint(L, arg+2, (L == L1)));
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_debug(lua_State *L)
+{
+  LJ_LIB_REG(L, LUA_DBLIBNAME, debug);
+  return 1;
+}
+

+ 851 - 0
luajit.mod/luajit/src/lib_ffi.c

@@ -0,0 +1,851 @@
+/*
+** FFI library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_ffi_c
+#define LUA_LIB
+
+#include <errno.h>
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+
+#if LJ_HASFFI
+
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_meta.h"
+#include "lj_ctype.h"
+#include "lj_cparse.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#include "lj_carith.h"
+#include "lj_ccall.h"
+#include "lj_ccallback.h"
+#include "lj_clib.h"
+#include "lj_ff.h"
+#include "lj_lib.h"
+
+/* -- C type checks ------------------------------------------------------- */
+
+/* Check first argument for a C type and returns its ID. */
+static CTypeID ffi_checkctype(lua_State *L, CTState *cts, TValue *param)
+{
+  TValue *o = L->base;
+  if (!(o < L->top)) {
+  err_argtype:
+    lj_err_argtype(L, 1, "C type");
+  }
+  if (tvisstr(o)) {  /* Parse an abstract C type declaration. */
+    GCstr *s = strV(o);
+    CPState cp;
+    int errcode;
+    cp.L = L;
+    cp.cts = cts;
+    cp.srcname = strdata(s);
+    cp.p = strdata(s);
+    cp.param = param;
+    cp.mode = CPARSE_MODE_ABSTRACT|CPARSE_MODE_NOIMPLICIT;
+    errcode = lj_cparse(&cp);
+    if (errcode) lj_err_throw(L, errcode);  /* Propagate errors. */
+    return cp.val.id;
+  } else {
+    GCcdata *cd;
+    if (!tviscdata(o)) goto err_argtype;
+    if (param && param < L->top) lj_err_arg(L, 1, LJ_ERR_FFI_NUMPARAM);
+    cd = cdataV(o);
+    return cd->ctypeid == CTID_CTYPEID ? *(CTypeID *)cdataptr(cd) : cd->ctypeid;
+  }
+}
+
+/* Check argument for C data and return it. */
+static GCcdata *ffi_checkcdata(lua_State *L, int narg)
+{
+  TValue *o = L->base + narg-1;
+  if (!(o < L->top && tviscdata(o)))
+    lj_err_argt(L, narg, LUA_TCDATA);
+  return cdataV(o);
+}
+
+/* Convert argument to C pointer. */
+static void *ffi_checkptr(lua_State *L, int narg, CTypeID id)
+{
+  CTState *cts = ctype_cts(L);
+  TValue *o = L->base + narg-1;
+  void *p;
+  if (o >= L->top)
+    lj_err_arg(L, narg, LJ_ERR_NOVAL);
+  lj_cconv_ct_tv(cts, ctype_get(cts, id), (uint8_t *)&p, o, CCF_ARG(narg));
+  return p;
+}
+
+/* Convert argument to int32_t. */
+static int32_t ffi_checkint(lua_State *L, int narg)
+{
+  CTState *cts = ctype_cts(L);
+  TValue *o = L->base + narg-1;
+  int32_t i;
+  if (o >= L->top)
+    lj_err_arg(L, narg, LJ_ERR_NOVAL);
+  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o,
+		 CCF_ARG(narg));
+  return i;
+}
+
+/* -- C type metamethods -------------------------------------------------- */
+
+#define LJLIB_MODULE_ffi_meta
+
+/* Handle ctype __index/__newindex metamethods. */
+static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
+{
+  CTypeID id = ctype_typeid(cts, ct);
+  cTValue *tv = lj_ctype_meta(cts, id, mm);
+  TValue *base = L->base;
+  if (!tv) {
+    const char *s;
+  err_index:
+    s = strdata(lj_ctype_repr(L, id, NULL));
+    if (tvisstr(L->base+1)) {
+      lj_err_callerv(L, LJ_ERR_FFI_BADMEMBER, s, strVdata(L->base+1));
+    } else {
+      const char *key = tviscdata(L->base+1) ?
+	strdata(lj_ctype_repr(L, cdataV(L->base+1)->ctypeid, NULL)) :
+	lj_typename(L->base+1);
+      lj_err_callerv(L, LJ_ERR_FFI_BADIDXW, s, key);
+    }
+  }
+  if (!tvisfunc(tv)) {
+    if (mm == MM_index) {
+      cTValue *o = lj_meta_tget(L, tv, base+1);
+      if (o) {
+	if (tvisnil(o)) goto err_index;
+	copyTV(L, L->top-1, o);
+	return 1;
+      }
+    } else {
+      TValue *o = lj_meta_tset(L, tv, base+1);
+      if (o) {
+	copyTV(L, o, base+2);
+	return 0;
+      }
+    }
+    copyTV(L, base, L->top);
+    tv = L->top-1;
+  }
+  return lj_meta_tailcall(L, tv);
+}
+
+LJLIB_CF(ffi_meta___index)	LJLIB_REC(cdata_index 0)
+{
+  CTState *cts = ctype_cts(L);
+  CTInfo qual = 0;
+  CType *ct;
+  uint8_t *p;
+  TValue *o = L->base;
+  if (!(o+1 < L->top && tviscdata(o)))  /* Also checks for presence of key. */
+    lj_err_argt(L, 1, LUA_TCDATA);
+  ct = lj_cdata_index(cts, cdataV(o), o+1, &p, &qual);
+  if ((qual & 1))
+    return ffi_index_meta(L, cts, ct, MM_index);
+  if (lj_cdata_get(cts, ct, L->top-1, p))
+    lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(ffi_meta___newindex)	LJLIB_REC(cdata_index 1)
+{
+  CTState *cts = ctype_cts(L);
+  CTInfo qual = 0;
+  CType *ct;
+  uint8_t *p;
+  TValue *o = L->base;
+  if (!(o+2 < L->top && tviscdata(o)))  /* Also checks for key and value. */
+    lj_err_argt(L, 1, LUA_TCDATA);
+  ct = lj_cdata_index(cts, cdataV(o), o+1, &p, &qual);
+  if ((qual & 1)) {
+    if ((qual & CTF_CONST))
+      lj_err_caller(L, LJ_ERR_FFI_WRCONST);
+    return ffi_index_meta(L, cts, ct, MM_newindex);
+  }
+  lj_cdata_set(cts, ct, p, o+2, qual);
+  return 0;
+}
+
+/* Common handler for cdata arithmetic. */
+static int ffi_arith(lua_State *L)
+{
+  MMS mm = (MMS)(curr_func(L)->c.ffid - (int)FF_ffi_meta___eq + (int)MM_eq);
+  return lj_carith_op(L, mm);
+}
+
+/* The following functions must be in contiguous ORDER MM. */
+LJLIB_CF(ffi_meta___eq)		LJLIB_REC(cdata_arith MM_eq)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___len)	LJLIB_REC(cdata_arith MM_len)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___lt)		LJLIB_REC(cdata_arith MM_lt)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___le)		LJLIB_REC(cdata_arith MM_le)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___concat)	LJLIB_REC(cdata_arith MM_concat)
+{
+  return ffi_arith(L);
+}
+
+/* Forward declaration. */
+static int lj_cf_ffi_new(lua_State *L);
+
+LJLIB_CF(ffi_meta___call)	LJLIB_REC(cdata_call)
+{
+  CTState *cts = ctype_cts(L);
+  GCcdata *cd = ffi_checkcdata(L, 1);
+  CTypeID id = cd->ctypeid;
+  CType *ct;
+  cTValue *tv;
+  MMS mm = MM_call;
+  if (cd->ctypeid == CTID_CTYPEID) {
+    id = *(CTypeID *)cdataptr(cd);
+    mm = MM_new;
+  } else {
+    int ret = lj_ccall_func(L, cd);
+    if (ret >= 0)
+      return ret;
+  }
+  /* Handle ctype __call/__new metamethod. */
+  ct = ctype_raw(cts, id);
+  if (ctype_isptr(ct->info)) id = ctype_cid(ct->info);
+  tv = lj_ctype_meta(cts, id, mm);
+  if (tv)
+    return lj_meta_tailcall(L, tv);
+  else if (mm == MM_call)
+    lj_err_callerv(L, LJ_ERR_FFI_BADCALL, strdata(lj_ctype_repr(L, id, NULL)));
+  return lj_cf_ffi_new(L);
+}
+
+LJLIB_CF(ffi_meta___add)	LJLIB_REC(cdata_arith MM_add)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___sub)	LJLIB_REC(cdata_arith MM_sub)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___mul)	LJLIB_REC(cdata_arith MM_mul)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___div)	LJLIB_REC(cdata_arith MM_div)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___mod)	LJLIB_REC(cdata_arith MM_mod)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___pow)	LJLIB_REC(cdata_arith MM_pow)
+{
+  return ffi_arith(L);
+}
+
+LJLIB_CF(ffi_meta___unm)	LJLIB_REC(cdata_arith MM_unm)
+{
+  return ffi_arith(L);
+}
+/* End of contiguous ORDER MM. */
+
+LJLIB_CF(ffi_meta___tostring)
+{
+  GCcdata *cd = ffi_checkcdata(L, 1);
+  const char *msg = "cdata<%s>: %p";
+  CTypeID id = cd->ctypeid;
+  void *p = cdataptr(cd);
+  if (id == CTID_CTYPEID) {
+    msg = "ctype<%s>";
+    id = *(CTypeID *)p;
+  } else {
+    CTState *cts = ctype_cts(L);
+    CType *ct = ctype_raw(cts, id);
+    if (ctype_isref(ct->info)) {
+      p = *(void **)p;
+      ct = ctype_rawchild(cts, ct);
+    }
+    if (ctype_iscomplex(ct->info)) {
+      setstrV(L, L->top-1, lj_ctype_repr_complex(L, cdataptr(cd), ct->size));
+      goto checkgc;
+    } else if (ct->size == 8 && ctype_isinteger(ct->info)) {
+      setstrV(L, L->top-1, lj_ctype_repr_int64(L, *(uint64_t *)cdataptr(cd),
+					       (ct->info & CTF_UNSIGNED)));
+      goto checkgc;
+    } else if (ctype_isfunc(ct->info)) {
+      p = *(void **)p;
+    } else if (ctype_isenum(ct->info)) {
+      msg = "cdata<%s>: %d";
+      p = (void *)(uintptr_t)*(uint32_t **)p;
+    } else {
+      if (ctype_isptr(ct->info)) {
+	p = cdata_getptr(p, ct->size);
+	ct = ctype_rawchild(cts, ct);
+      }
+      if (ctype_isstruct(ct->info) || ctype_isvector(ct->info)) {
+	/* Handle ctype __tostring metamethod. */
+	cTValue *tv = lj_ctype_meta(cts, ctype_typeid(cts, ct), MM_tostring);
+	if (tv)
+	  return lj_meta_tailcall(L, tv);
+      }
+    }
+  }
+  lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
+checkgc:
+  lj_gc_check(L);
+  return 1;
+}
+
+static int ffi_pairs(lua_State *L, MMS mm)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = ffi_checkcdata(L, 1)->ctypeid;
+  CType *ct = ctype_raw(cts, id);
+  cTValue *tv;
+  if (ctype_isptr(ct->info)) id = ctype_cid(ct->info);
+  tv = lj_ctype_meta(cts, id, mm);
+  if (!tv)
+    lj_err_callerv(L, LJ_ERR_FFI_BADMM, strdata(lj_ctype_repr(L, id, NULL)),
+		   strdata(mmname_str(G(L), mm)));
+  return lj_meta_tailcall(L, tv);
+}
+
+LJLIB_CF(ffi_meta___pairs)
+{
+  return ffi_pairs(L, MM_pairs);
+}
+
+LJLIB_CF(ffi_meta___ipairs)
+{
+  return ffi_pairs(L, MM_ipairs);
+}
+
+LJLIB_PUSH("ffi") LJLIB_SET(__metatable)
+
+#include "lj_libdef.h"
+
+/* -- C library metamethods ----------------------------------------------- */
+
+#define LJLIB_MODULE_ffi_clib
+
+/* Index C library by a name. */
+static TValue *ffi_clib_index(lua_State *L)
+{
+  TValue *o = L->base;
+  CLibrary *cl;
+  if (!(o < L->top && tvisudata(o) && udataV(o)->udtype == UDTYPE_FFI_CLIB))
+    lj_err_argt(L, 1, LUA_TUSERDATA);
+  cl = (CLibrary *)uddata(udataV(o));
+  if (!(o+1 < L->top && tvisstr(o+1)))
+    lj_err_argt(L, 2, LUA_TSTRING);
+  return lj_clib_index(L, cl, strV(o+1));
+}
+
+LJLIB_CF(ffi_clib___index)	LJLIB_REC(clib_index 1)
+{
+  TValue *tv = ffi_clib_index(L);
+  if (tviscdata(tv)) {
+    CTState *cts = ctype_cts(L);
+    GCcdata *cd = cdataV(tv);
+    CType *s = ctype_get(cts, cd->ctypeid);
+    if (ctype_isextern(s->info)) {
+      CTypeID sid = ctype_cid(s->info);
+      void *sp = *(void **)cdataptr(cd);
+      CType *ct = ctype_raw(cts, sid);
+      if (lj_cconv_tv_ct(cts, ct, sid, L->top-1, sp))
+	lj_gc_check(L);
+      return 1;
+    }
+  }
+  copyTV(L, L->top-1, tv);
+  return 1;
+}
+
+LJLIB_CF(ffi_clib___newindex)	LJLIB_REC(clib_index 0)
+{
+  TValue *tv = ffi_clib_index(L);
+  TValue *o = L->base+2;
+  if (o < L->top && tviscdata(tv)) {
+    CTState *cts = ctype_cts(L);
+    GCcdata *cd = cdataV(tv);
+    CType *d = ctype_get(cts, cd->ctypeid);
+    if (ctype_isextern(d->info)) {
+      CTInfo qual = 0;
+      for (;;) {  /* Skip attributes and collect qualifiers. */
+	d = ctype_child(cts, d);
+	if (!ctype_isattrib(d->info)) break;
+	if (ctype_attrib(d->info) == CTA_QUAL) qual |= d->size;
+      }
+      if (!((d->info|qual) & CTF_CONST)) {
+	lj_cconv_ct_tv(cts, d, *(void **)cdataptr(cd), o, 0);
+	return 0;
+      }
+    }
+  }
+  lj_err_caller(L, LJ_ERR_FFI_WRCONST);
+  return 0;  /* unreachable */
+}
+
+LJLIB_CF(ffi_clib___gc)
+{
+  TValue *o = L->base;
+  if (o < L->top && tvisudata(o) && udataV(o)->udtype == UDTYPE_FFI_CLIB)
+    lj_clib_unload((CLibrary *)uddata(udataV(o)));
+  return 0;
+}
+
+#include "lj_libdef.h"
+
+/* -- Callback function metamethods --------------------------------------- */
+
+#define LJLIB_MODULE_ffi_callback
+
+static int ffi_callback_set(lua_State *L, GCfunc *fn)
+{
+  GCcdata *cd = ffi_checkcdata(L, 1);
+  CTState *cts = ctype_cts(L);
+  CType *ct = ctype_raw(cts, cd->ctypeid);
+  if (ctype_isptr(ct->info) && (LJ_32 || ct->size == 8)) {
+    MSize slot = lj_ccallback_ptr2slot(cts, *(void **)cdataptr(cd));
+    if (slot < cts->cb.sizeid && cts->cb.cbid[slot] != 0) {
+      GCtab *t = cts->miscmap;
+      TValue *tv = lj_tab_setint(L, t, (int32_t)slot);
+      if (fn) {
+	setfuncV(L, tv, fn);
+	lj_gc_anybarriert(L, t);
+      } else {
+	setnilV(tv);
+	cts->cb.cbid[slot] = 0;
+	cts->cb.topid = slot < cts->cb.topid ? slot : cts->cb.topid;
+      }
+      return 0;
+    }
+  }
+  lj_err_caller(L, LJ_ERR_FFI_BADCBACK);
+  return 0;
+}
+
+LJLIB_CF(ffi_callback_free)
+{
+  return ffi_callback_set(L, NULL);
+}
+
+LJLIB_CF(ffi_callback_set)
+{
+  GCfunc *fn = lj_lib_checkfunc(L, 2);
+  return ffi_callback_set(L, fn);
+}
+
+LJLIB_PUSH(top-1) LJLIB_SET(__index)
+
+#include "lj_libdef.h"
+
+/* -- FFI library functions ----------------------------------------------- */
+
+#define LJLIB_MODULE_ffi
+
+LJLIB_CF(ffi_cdef)
+{
+  GCstr *s = lj_lib_checkstr(L, 1);
+  CPState cp;
+  int errcode;
+  cp.L = L;
+  cp.cts = ctype_cts(L);
+  cp.srcname = strdata(s);
+  cp.p = strdata(s);
+  cp.param = L->base+1;
+  cp.mode = CPARSE_MODE_MULTI|CPARSE_MODE_DIRECT;
+  errcode = lj_cparse(&cp);
+  if (errcode) lj_err_throw(L, errcode);  /* Propagate errors. */
+  lj_gc_check(L);
+  return 0;
+}
+
+LJLIB_CF(ffi_new)	LJLIB_REC(.)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = ffi_checkctype(L, cts, NULL);
+  CType *ct = ctype_raw(cts, id);
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
+  TValue *o = L->base+1;
+  GCcdata *cd;
+  if ((info & CTF_VLA)) {
+    o++;
+    sz = lj_ctype_vlsize(cts, ct, (CTSize)ffi_checkint(L, 2));
+  }
+  if (sz == CTSIZE_INVALID)
+    lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
+  if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
+    cd = lj_cdata_new(cts, id, sz);
+  else
+    cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
+  setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
+  lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
+		   o, (MSize)(L->top - o));  /* Initialize cdata. */
+  if (ctype_isstruct(ct->info)) {
+    /* Handle ctype __gc metamethod. Use the fast lookup here. */
+    cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id);
+    if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) {
+      GCtab *t = cts->finalizer;
+      if (gcref(t->metatable)) {
+	/* Add to finalizer table, if still enabled. */
+	copyTV(L, lj_tab_set(L, t, o-1), tv);
+	lj_gc_anybarriert(L, t);
+	cd->marked |= LJ_GC_CDATA_FIN;
+      }
+    }
+  }
+  L->top = o;  /* Only return the cdata itself. */
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(ffi_cast)	LJLIB_REC(ffi_new)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = ffi_checkctype(L, cts, NULL);
+  CType *d = ctype_raw(cts, id);
+  TValue *o = lj_lib_checkany(L, 2);
+  L->top = o+1;  /* Make sure this is the last item on the stack. */
+  if (!(ctype_isnum(d->info) || ctype_isptr(d->info) || ctype_isenum(d->info)))
+    lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
+  if (!(tviscdata(o) && cdataV(o)->ctypeid == id)) {
+    GCcdata *cd = lj_cdata_new(cts, id, d->size);
+    lj_cconv_ct_tv(cts, d, cdataptr(cd), o, CCF_CAST);
+    setcdataV(L, o, cd);
+    lj_gc_check(L);
+  }
+  return 1;
+}
+
+LJLIB_CF(ffi_typeof)	LJLIB_REC(.)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = ffi_checkctype(L, cts, L->base+1);
+  GCcdata *cd = lj_cdata_new(cts, CTID_CTYPEID, 4);
+  *(CTypeID *)cdataptr(cd) = id;
+  setcdataV(L, L->top-1, cd);
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(ffi_istype)	LJLIB_REC(.)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id1 = ffi_checkctype(L, cts, NULL);
+  TValue *o = lj_lib_checkany(L, 2);
+  int b = 0;
+  if (tviscdata(o)) {
+    GCcdata *cd = cdataV(o);
+    CTypeID id2 = cd->ctypeid == CTID_CTYPEID ? *(CTypeID *)cdataptr(cd) :
+						cd->ctypeid;
+    CType *ct1 = lj_ctype_rawref(cts, id1);
+    CType *ct2 = lj_ctype_rawref(cts, id2);
+    if (ct1 == ct2) {
+      b = 1;
+    } else if (ctype_type(ct1->info) == ctype_type(ct2->info) &&
+	       ct1->size == ct2->size) {
+      if (ctype_ispointer(ct1->info))
+	b = lj_cconv_compatptr(cts, ct1, ct2, CCF_IGNQUAL);
+      else if (ctype_isnum(ct1->info) || ctype_isvoid(ct1->info))
+	b = (((ct1->info ^ ct2->info) & ~(CTF_QUAL|CTF_LONG)) == 0);
+    } else if (ctype_isstruct(ct1->info) && ctype_isptr(ct2->info) &&
+	       ct1 == ctype_rawchild(cts, ct2)) {
+      b = 1;
+    }
+  }
+  setboolV(L->top-1, b);
+  setboolV(&G(L)->tmptv2, b);  /* Remember for trace recorder. */
+  return 1;
+}
+
+LJLIB_CF(ffi_sizeof)	LJLIB_REC(ffi_xof FF_ffi_sizeof)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = ffi_checkctype(L, cts, NULL);
+  CTSize sz;
+  if (LJ_UNLIKELY(tviscdata(L->base) && cdataisv(cdataV(L->base)))) {
+    sz = cdatavlen(cdataV(L->base));
+  } else {
+    CType *ct = lj_ctype_rawref(cts, id);
+    if (ctype_isvltype(ct->info))
+      sz = lj_ctype_vlsize(cts, ct, (CTSize)ffi_checkint(L, 2));
+    else
+      sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_INVALID;
+    if (LJ_UNLIKELY(sz == CTSIZE_INVALID)) {
+      setnilV(L->top-1);
+      return 1;
+    }
+  }
+  setintV(L->top-1, (int32_t)sz);
+  return 1;
+}
+
+LJLIB_CF(ffi_alignof)	LJLIB_REC(ffi_xof FF_ffi_alignof)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = ffi_checkctype(L, cts, NULL);
+  CTSize sz = 0;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
+  setintV(L->top-1, 1 << ctype_align(info));
+  return 1;
+}
+
+LJLIB_CF(ffi_offsetof)	LJLIB_REC(ffi_xof FF_ffi_offsetof)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = ffi_checkctype(L, cts, NULL);
+  GCstr *name = lj_lib_checkstr(L, 2);
+  CType *ct = lj_ctype_rawref(cts, id);
+  CTSize ofs;
+  if (ctype_isstruct(ct->info) && ct->size != CTSIZE_INVALID) {
+    CType *fct = lj_ctype_getfield(cts, ct, name, &ofs);
+    if (fct) {
+      setintV(L->top-1, ofs);
+      if (ctype_isfield(fct->info)) {
+	return 1;
+      } else if (ctype_isbitfield(fct->info)) {
+	setintV(L->top++, ctype_bitpos(fct->info));
+	setintV(L->top++, ctype_bitbsz(fct->info));
+	return 3;
+      }
+    }
+  }
+  return 0;
+}
+
+LJLIB_CF(ffi_errno)	LJLIB_REC(.)
+{
+  int err = errno;
+  if (L->top > L->base)
+    errno = ffi_checkint(L, 1);
+  setintV(L->top++, err);
+  return 1;
+}
+
+LJLIB_CF(ffi_string)	LJLIB_REC(.)
+{
+  CTState *cts = ctype_cts(L);
+  TValue *o = lj_lib_checkany(L, 1);
+  const char *p;
+  size_t len;
+  if (o+1 < L->top && !tvisnil(o+1)) {
+    len = (size_t)ffi_checkint(L, 2);
+    lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, o,
+		   CCF_ARG(1));
+  } else {
+    lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CCHAR), (uint8_t *)&p, o,
+		   CCF_ARG(1));
+    len = strlen(p);
+  }
+  L->top = o+1;  /* Make sure this is the last item on the stack. */
+  setstrV(L, o, lj_str_new(L, p, len));
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(ffi_copy)	LJLIB_REC(.)
+{
+  void *dp = ffi_checkptr(L, 1, CTID_P_VOID);
+  void *sp = ffi_checkptr(L, 2, CTID_P_CVOID);
+  TValue *o = L->base+1;
+  CTSize len;
+  if (tvisstr(o) && o+1 >= L->top)
+    len = strV(o)->len+1;  /* Copy Lua string including trailing '\0'. */
+  else
+    len = (CTSize)ffi_checkint(L, 3);
+  memcpy(dp, sp, len);
+  return 0;
+}
+
+LJLIB_CF(ffi_fill)	LJLIB_REC(.)
+{
+  void *dp = ffi_checkptr(L, 1, CTID_P_VOID);
+  CTSize len = (CTSize)ffi_checkint(L, 2);
+  int32_t fill = 0;
+  if (L->base+2 < L->top && !tvisnil(L->base+2)) fill = ffi_checkint(L, 3);
+  memset(dp, fill, len);
+  return 0;
+}
+
+#define H_(le, be)	LJ_ENDIAN_SELECT(0x##le, 0x##be)
+
+/* Test ABI string. */
+LJLIB_CF(ffi_abi)	LJLIB_REC(.)
+{
+  GCstr *s = lj_lib_checkstr(L, 1);
+  int b = 0;
+  switch (s->hash) {
+#if LJ_64
+  case H_(849858eb,ad35fd06): b = 1; break;  /* 64bit */
+#else
+  case H_(662d3c79,d0e22477): b = 1; break;  /* 32bit */
+#endif
+#if LJ_ARCH_HASFPU
+  case H_(e33ee463,e33ee463): b = 1; break;  /* fpu */
+#endif
+#if LJ_ABI_SOFTFP
+  case H_(61211a23,c2e8c81c): b = 1; break;  /* softfp */
+#else
+  case H_(539417a8,8ce0812f): b = 1; break;  /* hardfp */
+#endif
+#if LJ_ABI_EABI
+  case H_(2182df8f,f2ed1152): b = 1; break;  /* eabi */
+#endif
+#if LJ_ABI_WIN
+  case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
+#endif
+  case H_(3af93066,1f001464): b = 1; break;  /* le/be */
+  default:
+    break;
+  }
+  setboolV(L->top-1, b);
+  setboolV(&G(L)->tmptv2, b);  /* Remember for trace recorder. */
+  return 1;
+}
+
+#undef H_
+
+LJLIB_PUSH(top-8) LJLIB_SET(!)  /* Store reference to miscmap table. */
+
+LJLIB_CF(ffi_metatype)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = ffi_checkctype(L, cts, NULL);
+  GCtab *mt = lj_lib_checktab(L, 2);
+  GCtab *t = cts->miscmap;
+  CType *ct = ctype_get(cts, id);  /* Only allow raw types. */
+  TValue *tv;
+  GCcdata *cd;
+  if (!(ctype_isstruct(ct->info) || ctype_iscomplex(ct->info) ||
+	ctype_isvector(ct->info)))
+    lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
+  tv = lj_tab_setinth(L, t, -(int32_t)id);
+  if (!tvisnil(tv))
+    lj_err_caller(L, LJ_ERR_PROTMT);
+  settabV(L, tv, mt);
+  lj_gc_anybarriert(L, t);
+  cd = lj_cdata_new(cts, CTID_CTYPEID, 4);
+  *(CTypeID *)cdataptr(cd) = id;
+  setcdataV(L, L->top-1, cd);
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_PUSH(top-7) LJLIB_SET(!)  /* Store reference to finalizer table. */
+
+LJLIB_CF(ffi_gc)	LJLIB_REC(.)
+{
+  GCcdata *cd = ffi_checkcdata(L, 1);
+  TValue *fin = lj_lib_checkany(L, 2);
+  CTState *cts = ctype_cts(L);
+  GCtab *t = cts->finalizer;
+  CType *ct = ctype_raw(cts, cd->ctypeid);
+  if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
+	ctype_isrefarray(ct->info)))
+    lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
+  if (gcref(t->metatable)) {  /* Update finalizer table, if still enabled. */
+    copyTV(L, lj_tab_set(L, t, L->base), fin);
+    lj_gc_anybarriert(L, t);
+    if (!tvisnil(fin))
+      cd->marked |= LJ_GC_CDATA_FIN;
+    else
+      cd->marked &= ~LJ_GC_CDATA_FIN;
+  }
+  L->top = L->base+1;  /* Pass through the cdata object. */
+  return 1;
+}
+
+LJLIB_PUSH(top-5) LJLIB_SET(!)  /* Store clib metatable in func environment. */
+
+LJLIB_CF(ffi_load)
+{
+  GCstr *name = lj_lib_checkstr(L, 1);
+  int global = (L->base+1 < L->top && tvistruecond(L->base+1));
+  lj_clib_load(L, tabref(curr_func(L)->c.env), name, global);
+  return 1;
+}
+
+LJLIB_PUSH(top-4) LJLIB_SET(C)
+LJLIB_PUSH(top-3) LJLIB_SET(os)
+LJLIB_PUSH(top-2) LJLIB_SET(arch)
+
+#include "lj_libdef.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Create special weak-keyed finalizer table. */
+static GCtab *ffi_finalizer(lua_State *L)
+{
+  /* NOBARRIER: The table is new (marked white). */
+  GCtab *t = lj_tab_new(L, 0, 1);
+  settabV(L, L->top++, t);
+  setgcref(t->metatable, obj2gco(t));
+  setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
+	  lj_str_newlit(L, "K"));
+  t->nomm = (uint8_t)(~(1u<<MM_mode));
+  return t;
+}
+
+/* Register FFI module as loaded. */
+static void ffi_register_module(lua_State *L)
+{
+  cTValue *tmp = lj_tab_getstr(tabV(registry(L)), lj_str_newlit(L, "_LOADED"));
+  if (tmp && tvistab(tmp)) {
+    GCtab *t = tabV(tmp);
+    copyTV(L, lj_tab_setstr(L, t, lj_str_newlit(L, LUA_FFILIBNAME)), L->top-1);
+    lj_gc_anybarriert(L, t);
+  }
+}
+
+LUALIB_API int luaopen_ffi(lua_State *L)
+{
+  CTState *cts = lj_ctype_init(L);
+  settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1)));
+  cts->finalizer = ffi_finalizer(L);
+  LJ_LIB_REG(L, NULL, ffi_meta);
+  /* NOBARRIER: basemt is a GC root. */
+  setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1)));
+  LJ_LIB_REG(L, NULL, ffi_clib);
+  LJ_LIB_REG(L, NULL, ffi_callback);
+  /* NOBARRIER: the key is new and lj_tab_newkey() handles the barrier. */
+  settabV(L, lj_tab_setstr(L, cts->miscmap, &cts->g->strempty), tabV(L->top-1));
+  L->top--;
+  lj_clib_default(L, tabV(L->top-1));  /* Create ffi.C default namespace. */
+  lua_pushliteral(L, LJ_OS_NAME);
+  lua_pushliteral(L, LJ_ARCH_NAME);
+  LJ_LIB_REG(L, NULL, ffi);  /* Note: no global "ffi" created! */
+  ffi_register_module(L);
+  return 1;
+}
+
+#endif

+ 55 - 0
luajit.mod/luajit/src/lib_init.c

@@ -0,0 +1,55 @@
+/*
+** Library initialization.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major parts taken verbatim from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lib_init_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_arch.h"
+
+static const luaL_Reg lj_lib_load[] = {
+  { "",			luaopen_base },
+  { LUA_LOADLIBNAME,	luaopen_package },
+  { LUA_TABLIBNAME,	luaopen_table },
+  { LUA_IOLIBNAME,	luaopen_io },
+  { LUA_OSLIBNAME,	luaopen_os },
+  { LUA_STRLIBNAME,	luaopen_string },
+  { LUA_MATHLIBNAME,	luaopen_math },
+  { LUA_DBLIBNAME,	luaopen_debug },
+  { LUA_BITLIBNAME,	luaopen_bit },
+  { LUA_JITLIBNAME,	luaopen_jit },
+  { NULL,		NULL }
+};
+
+static const luaL_Reg lj_lib_preload[] = {
+#if LJ_HASFFI
+  { LUA_FFILIBNAME,	luaopen_ffi },
+#endif
+  { NULL,		NULL }
+};
+
+LUALIB_API void luaL_openlibs(lua_State *L)
+{
+  const luaL_Reg *lib;
+  for (lib = lj_lib_load; lib->func; lib++) {
+    lua_pushcfunction(L, lib->func);
+    lua_pushstring(L, lib->name);
+    lua_call(L, 1, 0);
+  }
+  luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD",
+		 sizeof(lj_lib_preload)/sizeof(lj_lib_preload[0])-1);
+  for (lib = lj_lib_preload; lib->func; lib++) {
+    lua_pushcfunction(L, lib->func);
+    lua_setfield(L, -2, lib->name);
+  }
+  lua_pop(L, 1);
+}
+

+ 539 - 0
luajit.mod/luajit/src/lib_io.c

@@ -0,0 +1,539 @@
+/*
+** I/O library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <errno.h>
+#include <stdio.h>
+
+#define lib_io_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_state.h"
+#include "lj_ff.h"
+#include "lj_lib.h"
+
+/* Userdata payload for I/O file. */
+typedef struct IOFileUD {
+  FILE *fp;		/* File handle. */
+  uint32_t type;	/* File type. */
+} IOFileUD;
+
+#define IOFILE_TYPE_FILE	0	/* Regular file. */
+#define IOFILE_TYPE_PIPE	1	/* Pipe. */
+#define IOFILE_TYPE_STDF	2	/* Standard file handle. */
+#define IOFILE_TYPE_MASK	3
+
+#define IOFILE_FLAG_CLOSE	4	/* Close after io.lines() iterator. */
+
+#define IOSTDF_UD(L, id)	(&gcref(G(L)->gcroot[(id)])->ud)
+#define IOSTDF_IOF(L, id)	((IOFileUD *)uddata(IOSTDF_UD(L, (id))))
+
+/* -- Open/close helpers -------------------------------------------------- */
+
+static IOFileUD *io_tofilep(lua_State *L)
+{
+  if (!(L->base < L->top && tvisudata(L->base) &&
+	udataV(L->base)->udtype == UDTYPE_IO_FILE))
+    lj_err_argtype(L, 1, "FILE*");
+  return (IOFileUD *)uddata(udataV(L->base));
+}
+
+static IOFileUD *io_tofile(lua_State *L)
+{
+  IOFileUD *iof = io_tofilep(L);
+  if (iof->fp == NULL)
+    lj_err_caller(L, LJ_ERR_IOCLFL);
+  return iof;
+}
+
+static FILE *io_stdfile(lua_State *L, ptrdiff_t id)
+{
+  IOFileUD *iof = IOSTDF_IOF(L, id);
+  if (iof->fp == NULL)
+    lj_err_caller(L, LJ_ERR_IOSTDCL);
+  return iof->fp;
+}
+
+static IOFileUD *io_file_new(lua_State *L)
+{
+  IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
+  GCudata *ud = udataV(L->top-1);
+  ud->udtype = UDTYPE_IO_FILE;
+  /* NOBARRIER: The GCudata is new (marked white). */
+  setgcrefr(ud->metatable, curr_func(L)->c.env);
+  iof->fp = NULL;
+  iof->type = IOFILE_TYPE_FILE;
+  return iof;
+}
+
+static IOFileUD *io_file_open(lua_State *L, const char *mode)
+{
+  const char *fname = strdata(lj_lib_checkstr(L, 1));
+  IOFileUD *iof = io_file_new(L);
+  iof->fp = fopen(fname, mode);
+  if (iof->fp == NULL)
+    luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
+  return iof;
+}
+
+static int io_file_close(lua_State *L, IOFileUD *iof)
+{
+  int ok;
+  if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_FILE) {
+    ok = (fclose(iof->fp) == 0);
+  } else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) {
+    int stat = -1;
+#if LJ_TARGET_POSIX
+    stat = pclose(iof->fp);
+#elif LJ_TARGET_WINDOWS
+    stat = _pclose(iof->fp);
+#else
+    lua_assert(0);
+    return 0;
+#endif
+#if LJ_52
+    iof->fp = NULL;
+    return luaL_execresult(L, stat);
+#else
+    ok = (stat != -1);
+#endif
+  } else {
+    lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF);
+    setnilV(L->top++);
+    lua_pushliteral(L, "cannot close standard file");
+    return 2;
+  }
+  iof->fp = NULL;
+  return luaL_fileresult(L, ok, NULL);
+}
+
+/* -- Read/write helpers -------------------------------------------------- */
+
+static int io_file_readnum(lua_State *L, FILE *fp)
+{
+  lua_Number d;
+  if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
+    if (LJ_DUALNUM) {
+      int32_t i = lj_num2int(d);
+      if (d == (lua_Number)i && !tvismzero((cTValue *)&d)) {
+	setintV(L->top++, i);
+	return 1;
+      }
+    }
+    setnumV(L->top++, d);
+    return 1;
+  } else {
+    setnilV(L->top++);
+    return 0;
+  }
+}
+
+static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
+{
+  MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
+  char *buf;
+  for (;;) {
+    buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    if (fgets(buf+n, m-n, fp) == NULL) break;
+    n += (MSize)strlen(buf+n);
+    ok |= n;
+    if (n && buf[n-1] == '\n') { n -= chop; break; }
+    if (n >= m - 64) m += m;
+  }
+  setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+  lj_gc_check(L);
+  return (int)ok;
+}
+
+static void io_file_readall(lua_State *L, FILE *fp)
+{
+  MSize m, n;
+  for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
+    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    n += (MSize)fread(buf+n, 1, m-n, fp);
+    if (n != m) {
+      setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+      lj_gc_check(L);
+      return;
+    }
+  }
+}
+
+static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
+{
+  if (m) {
+    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    MSize n = (MSize)fread(buf, 1, m, fp);
+    setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+    lj_gc_check(L);
+    return (n > 0 || m == 0);
+  } else {
+    int c = getc(fp);
+    ungetc(c, fp);
+    setstrV(L, L->top++, &G(L)->strempty);
+    return (c != EOF);
+  }
+}
+
+static int io_file_read(lua_State *L, FILE *fp, int start)
+{
+  int ok, n, nargs = (int)(L->top - L->base) - start;
+  clearerr(fp);
+  if (nargs == 0) {
+    ok = io_file_readline(L, fp, 1);
+    n = start+1;  /* Return 1 result. */
+  } else {
+    /* The results plus the buffers go on top of the args. */
+    luaL_checkstack(L, nargs+LUA_MINSTACK, "too many arguments");
+    ok = 1;
+    for (n = start; nargs-- && ok; n++) {
+      if (tvisstr(L->base+n)) {
+	const char *p = strVdata(L->base+n);
+	if (p[0] != '*')
+	  lj_err_arg(L, n+1, LJ_ERR_INVOPT);
+	if (p[1] == 'n')
+	  ok = io_file_readnum(L, fp);
+	else if ((p[1] & ~0x20) == 'L')
+	  ok = io_file_readline(L, fp, (p[1] == 'l'));
+	else if (p[1] == 'a')
+	  io_file_readall(L, fp);
+	else
+	  lj_err_arg(L, n+1, LJ_ERR_INVFMT);
+      } else if (tvisnumber(L->base+n)) {
+	ok = io_file_readlen(L, fp, (MSize)lj_lib_checkint(L, n+1));
+      } else {
+	lj_err_arg(L, n+1, LJ_ERR_INVOPT);
+      }
+    }
+  }
+  if (ferror(fp))
+    return luaL_fileresult(L, 0, NULL);
+  if (!ok)
+    setnilV(L->top-1);  /* Replace last result with nil. */
+  return n - start;
+}
+
+static int io_file_write(lua_State *L, FILE *fp, int start)
+{
+  cTValue *tv;
+  int status = 1;
+  for (tv = L->base+start; tv < L->top; tv++) {
+    if (tvisstr(tv)) {
+      MSize len = strV(tv)->len;
+      status = status && (fwrite(strVdata(tv), 1, len, fp) == len);
+    } else if (tvisint(tv)) {
+      char buf[LJ_STR_INTBUF];
+      char *p = lj_str_bufint(buf, intV(tv));
+      size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
+      status = status && (fwrite(p, 1, len, fp) == len);
+    } else if (tvisnum(tv)) {
+      status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
+    } else {
+      lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
+    }
+  }
+  if (LJ_52 && status) {
+    L->top = L->base+1;
+    if (start == 0)
+      setudataV(L, L->base, IOSTDF_UD(L, GCROOT_IO_OUTPUT));
+    return 1;
+  }
+  return luaL_fileresult(L, status, NULL);
+}
+
+static int io_file_iter(lua_State *L)
+{
+  GCfunc *fn = curr_func(L);
+  IOFileUD *iof = uddata(udataV(&fn->c.upvalue[0]));
+  int n = fn->c.nupvalues - 1;
+  if (iof->fp == NULL)
+    lj_err_caller(L, LJ_ERR_IOCLFL);
+  L->top = L->base;
+  if (n) {  /* Copy upvalues with options to stack. */
+    if (n > LUAI_MAXCSTACK)
+      lj_err_caller(L, LJ_ERR_STKOV);
+    lj_state_checkstack(L, (MSize)n);
+    memcpy(L->top, &fn->c.upvalue[1], n*sizeof(TValue));
+    L->top += n;
+  }
+  n = io_file_read(L, iof->fp, 0);
+  if (ferror(iof->fp))
+    lj_err_callermsg(L, strVdata(L->top-2));
+  if (tvisnil(L->base) && (iof->type & IOFILE_FLAG_CLOSE)) {
+    io_file_close(L, iof);  /* Return values are ignored. */
+    return 0;
+  }
+  return n;
+}
+
+/* -- I/O file methods ---------------------------------------------------- */
+
+#define LJLIB_MODULE_io_method
+
+LJLIB_CF(io_method_close)
+{
+  IOFileUD *iof = L->base < L->top ? io_tofile(L) :
+		  IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
+  return io_file_close(L, iof);
+}
+
+LJLIB_CF(io_method_read)
+{
+  return io_file_read(L, io_tofile(L)->fp, 1);
+}
+
+LJLIB_CF(io_method_write)		LJLIB_REC(io_write 0)
+{
+  return io_file_write(L, io_tofile(L)->fp, 1);
+}
+
+LJLIB_CF(io_method_flush)		LJLIB_REC(io_flush 0)
+{
+  return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
+}
+
+LJLIB_CF(io_method_seek)
+{
+  FILE *fp = io_tofile(L)->fp;
+  int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
+  int64_t ofs = 0;
+  cTValue *o;
+  int res;
+  if (opt == 0) opt = SEEK_SET;
+  else if (opt == 1) opt = SEEK_CUR;
+  else if (opt == 2) opt = SEEK_END;
+  o = L->base+2;
+  if (o < L->top) {
+    if (tvisint(o))
+      ofs = (int64_t)intV(o);
+    else if (tvisnum(o))
+      ofs = (int64_t)numV(o);
+    else if (!tvisnil(o))
+      lj_err_argt(L, 3, LUA_TNUMBER);
+  }
+#if LJ_TARGET_POSIX
+  res = fseeko(fp, ofs, opt);
+#elif _MSC_VER >= 1400
+  res = _fseeki64(fp, ofs, opt);
+#elif defined(__MINGW32__)
+  res = fseeko64(fp, ofs, opt);
+#else
+  res = fseek(fp, (long)ofs, opt);
+#endif
+  if (res)
+    return luaL_fileresult(L, 0, NULL);
+#if LJ_TARGET_POSIX
+  ofs = ftello(fp);
+#elif _MSC_VER >= 1400
+  ofs = _ftelli64(fp);
+#elif defined(__MINGW32__)
+  ofs = ftello64(fp);
+#else
+  ofs = (int64_t)ftell(fp);
+#endif
+  setint64V(L->top-1, ofs);
+  return 1;
+}
+
+LJLIB_CF(io_method_setvbuf)
+{
+  FILE *fp = io_tofile(L)->fp;
+  int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no");
+  size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE);
+  if (opt == 0) opt = _IOFBF;
+  else if (opt == 1) opt = _IOLBF;
+  else if (opt == 2) opt = _IONBF;
+  return luaL_fileresult(L, setvbuf(fp, NULL, opt, sz) == 0, NULL);
+}
+
+LJLIB_CF(io_method_lines)
+{
+  io_tofile(L);
+  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
+  return 1;
+}
+
+LJLIB_CF(io_method___gc)
+{
+  IOFileUD *iof = io_tofilep(L);
+  if (iof->fp != NULL && (iof->type & IOFILE_TYPE_MASK) != IOFILE_TYPE_STDF)
+    io_file_close(L, iof);
+  return 0;
+}
+
+LJLIB_CF(io_method___tostring)
+{
+  IOFileUD *iof = io_tofilep(L);
+  if (iof->fp != NULL)
+    lua_pushfstring(L, "file (%p)", iof->fp);
+  else
+    lua_pushliteral(L, "file (closed)");
+  return 1;
+}
+
+LJLIB_PUSH(top-1) LJLIB_SET(__index)
+
+#include "lj_libdef.h"
+
+/* -- I/O library functions ----------------------------------------------- */
+
+#define LJLIB_MODULE_io
+
+LJLIB_PUSH(top-2) LJLIB_SET(!)  /* Set environment. */
+
+LJLIB_CF(io_open)
+{
+  const char *fname = strdata(lj_lib_checkstr(L, 1));
+  GCstr *s = lj_lib_optstr(L, 2);
+  const char *mode = s ? strdata(s) : "r";
+  IOFileUD *iof = io_file_new(L);
+  iof->fp = fopen(fname, mode);
+  return iof->fp != NULL ? 1 : luaL_fileresult(L, 0, fname);
+}
+
+LJLIB_CF(io_popen)
+{
+#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS
+  const char *fname = strdata(lj_lib_checkstr(L, 1));
+  GCstr *s = lj_lib_optstr(L, 2);
+  const char *mode = s ? strdata(s) : "r";
+  IOFileUD *iof = io_file_new(L);
+  iof->type = IOFILE_TYPE_PIPE;
+#if LJ_TARGET_POSIX
+  fflush(NULL);
+  iof->fp = popen(fname, mode);
+#else
+  iof->fp = _popen(fname, mode);
+#endif
+  return iof->fp != NULL ? 1 : luaL_fileresult(L, 0, fname);
+#else
+  return luaL_error(L, LUA_QL("popen") " not supported");
+#endif
+}
+
+LJLIB_CF(io_tmpfile)
+{
+  IOFileUD *iof = io_file_new(L);
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
+  iof->fp = NULL; errno = ENOSYS;
+#else
+  iof->fp = tmpfile();
+#endif
+  return iof->fp != NULL ? 1 : luaL_fileresult(L, 0, NULL);
+}
+
+LJLIB_CF(io_close)
+{
+  return lj_cf_io_method_close(L);
+}
+
+LJLIB_CF(io_read)
+{
+  return io_file_read(L, io_stdfile(L, GCROOT_IO_INPUT), 0);
+}
+
+LJLIB_CF(io_write)		LJLIB_REC(io_write GCROOT_IO_OUTPUT)
+{
+  return io_file_write(L, io_stdfile(L, GCROOT_IO_OUTPUT), 0);
+}
+
+LJLIB_CF(io_flush)		LJLIB_REC(io_flush GCROOT_IO_OUTPUT)
+{
+  return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL);
+}
+
+static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode)
+{
+  if (L->base < L->top && !tvisnil(L->base)) {
+    if (tvisudata(L->base)) {
+      io_tofile(L);
+      L->top = L->base+1;
+    } else {
+      io_file_open(L, mode);
+    }
+    /* NOBARRIER: The standard I/O handles are GC roots. */
+    setgcref(G(L)->gcroot[id], gcV(L->top-1));
+  } else {
+    setudataV(L, L->top++, IOSTDF_UD(L, id));
+  }
+  return 1;
+}
+
+LJLIB_CF(io_input)
+{
+  return io_std_getset(L, GCROOT_IO_INPUT, "r");
+}
+
+LJLIB_CF(io_output)
+{
+  return io_std_getset(L, GCROOT_IO_OUTPUT, "w");
+}
+
+LJLIB_CF(io_lines)
+{
+  if (L->base == L->top) setnilV(L->top++);
+  if (!tvisnil(L->base)) {  /* io.lines(fname) */
+    IOFileUD *iof = io_file_open(L, "r");
+    iof->type = IOFILE_TYPE_FILE|IOFILE_FLAG_CLOSE;
+    L->top--;
+    setudataV(L, L->base, udataV(L->top));
+  } else {  /* io.lines() iterates over stdin. */
+    setudataV(L, L->base, IOSTDF_UD(L, GCROOT_IO_INPUT));
+  }
+  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
+  return 1;
+}
+
+LJLIB_CF(io_type)
+{
+  cTValue *o = lj_lib_checkany(L, 1);
+  if (!(tvisudata(o) && udataV(o)->udtype == UDTYPE_IO_FILE))
+    setnilV(L->top++);
+  else if (((IOFileUD *)uddata(udataV(o)))->fp != NULL)
+    lua_pushliteral(L, "file");
+  else
+    lua_pushliteral(L, "closed file");
+  return 1;
+}
+
+#include "lj_libdef.h"
+
+/* ------------------------------------------------------------------------ */
+
+static GCobj *io_std_new(lua_State *L, FILE *fp, const char *name)
+{
+  IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
+  GCudata *ud = udataV(L->top-1);
+  ud->udtype = UDTYPE_IO_FILE;
+  /* NOBARRIER: The GCudata is new (marked white). */
+  setgcref(ud->metatable, gcV(L->top-3));
+  iof->fp = fp;
+  iof->type = IOFILE_TYPE_STDF;
+  lua_setfield(L, -2, name);
+  return obj2gco(ud);
+}
+
+LUALIB_API int luaopen_io(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, io_method);
+  copyTV(L, L->top, L->top-1); L->top++;
+  lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
+  LJ_LIB_REG(L, LUA_IOLIBNAME, io);
+  setgcref(G(L)->gcroot[GCROOT_IO_INPUT], io_std_new(L, stdin, "stdin"));
+  setgcref(G(L)->gcroot[GCROOT_IO_OUTPUT], io_std_new(L, stdout, "stdout"));
+  io_std_new(L, stderr, "stderr");
+  return 1;
+}
+

+ 663 - 0
luajit.mod/luajit/src/lib_jit.c

@@ -0,0 +1,663 @@
+/*
+** JIT library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_jit_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_arch.h"
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_debug.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_bc.h"
+#if LJ_HASJIT
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_ircall.h"
+#include "lj_iropt.h"
+#include "lj_target.h"
+#endif
+#include "lj_dispatch.h"
+#include "lj_vm.h"
+#include "lj_vmevent.h"
+#include "lj_lib.h"
+
+#include "luajit.h"
+
+/* -- jit.* functions ----------------------------------------------------- */
+
+#define LJLIB_MODULE_jit
+
+static int setjitmode(lua_State *L, int mode)
+{
+  int idx = 0;
+  if (L->base == L->top || tvisnil(L->base)) {  /* jit.on/off/flush([nil]) */
+    mode |= LUAJIT_MODE_ENGINE;
+  } else {
+    /* jit.on/off/flush(func|proto, nil|true|false) */
+    if (tvisfunc(L->base) || tvisproto(L->base))
+      idx = 1;
+    else if (!tvistrue(L->base))  /* jit.on/off/flush(true, nil|true|false) */
+      goto err;
+    if (L->base+1 < L->top && tvisbool(L->base+1))
+      mode |= boolV(L->base+1) ? LUAJIT_MODE_ALLFUNC : LUAJIT_MODE_ALLSUBFUNC;
+    else
+      mode |= LUAJIT_MODE_FUNC;
+  }
+  if (luaJIT_setmode(L, idx, mode) != 1) {
+    if ((mode & LUAJIT_MODE_MASK) == LUAJIT_MODE_ENGINE)
+      lj_err_caller(L, LJ_ERR_NOJIT);
+  err:
+    lj_err_argt(L, 1, LUA_TFUNCTION);
+  }
+  return 0;
+}
+
+LJLIB_CF(jit_on)
+{
+  return setjitmode(L, LUAJIT_MODE_ON);
+}
+
+LJLIB_CF(jit_off)
+{
+  return setjitmode(L, LUAJIT_MODE_OFF);
+}
+
+LJLIB_CF(jit_flush)
+{
+#if LJ_HASJIT
+  if (L->base < L->top && tvisnumber(L->base)) {
+    int traceno = lj_lib_checkint(L, 1);
+    luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE);
+    return 0;
+  }
+#endif
+  return setjitmode(L, LUAJIT_MODE_FLUSH);
+}
+
+#if LJ_HASJIT
+/* Push a string for every flag bit that is set. */
+static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base,
+				const char *str)
+{
+  for (; *str; base <<= 1, str += 1+*str)
+    if (flags & base)
+      setstrV(L, L->top++, lj_str_new(L, str+1, *(uint8_t *)str));
+}
+#endif
+
+LJLIB_CF(jit_status)
+{
+#if LJ_HASJIT
+  jit_State *J = L2J(L);
+  L->top = L->base;
+  setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
+  flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING);
+  flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING);
+  return (int)(L->top - L->base);
+#else
+  setboolV(L->top++, 0);
+  return 1;
+#endif
+}
+
+LJLIB_CF(jit_attach)
+{
+#ifdef LUAJIT_DISABLE_VMEVENT
+  luaL_error(L, "vmevent API disabled");
+#else
+  GCfunc *fn = lj_lib_checkfunc(L, 1);
+  GCstr *s = lj_lib_optstr(L, 2);
+  luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE);
+  if (s) {  /* Attach to given event. */
+    const uint8_t *p = (const uint8_t *)strdata(s);
+    uint32_t h = s->len;
+    while (*p) h = h ^ (lj_rol(h, 6) + *p++);
+    lua_pushvalue(L, 1);
+    lua_rawseti(L, -2, VMEVENT_HASHIDX(h));
+    G(L)->vmevmask = VMEVENT_NOCACHE;  /* Invalidate cache. */
+  } else {  /* Detach if no event given. */
+    setnilV(L->top++);
+    while (lua_next(L, -2)) {
+      L->top--;
+      if (tvisfunc(L->top) && funcV(L->top) == fn) {
+	setnilV(lj_tab_set(L, tabV(L->top-2), L->top-1));
+      }
+    }
+  }
+#endif
+  return 0;
+}
+
+LJLIB_PUSH(top-5) LJLIB_SET(os)
+LJLIB_PUSH(top-4) LJLIB_SET(arch)
+LJLIB_PUSH(top-3) LJLIB_SET(version_num)
+LJLIB_PUSH(top-2) LJLIB_SET(version)
+
+#include "lj_libdef.h"
+
+/* -- jit.util.* functions ------------------------------------------------ */
+
+#define LJLIB_MODULE_jit_util
+
+/* -- Reflection API for Lua functions ------------------------------------ */
+
+/* Return prototype of first argument (Lua function or prototype object) */
+static GCproto *check_Lproto(lua_State *L, int nolua)
+{
+  TValue *o = L->base;
+  if (L->top > o) {
+    if (tvisproto(o)) {
+      return protoV(o);
+    } else if (tvisfunc(o)) {
+      if (isluafunc(funcV(o)))
+	return funcproto(funcV(o));
+      else if (nolua)
+	return NULL;
+    }
+  }
+  lj_err_argt(L, 1, LUA_TFUNCTION);
+  return NULL;  /* unreachable */
+}
+
+static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
+{
+  setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val);
+}
+
+/* local info = jit.util.funcinfo(func [,pc]) */
+LJLIB_CF(jit_util_funcinfo)
+{
+  GCproto *pt = check_Lproto(L, 1);
+  if (pt) {
+    BCPos pc = (BCPos)lj_lib_optint(L, 2, 0);
+    GCtab *t;
+    lua_createtable(L, 0, 16);  /* Increment hash size if fields are added. */
+    t = tabV(L->top-1);
+    setintfield(L, t, "linedefined", pt->firstline);
+    setintfield(L, t, "lastlinedefined", pt->firstline + pt->numline);
+    setintfield(L, t, "stackslots", pt->framesize);
+    setintfield(L, t, "params", pt->numparams);
+    setintfield(L, t, "bytecodes", (int32_t)pt->sizebc);
+    setintfield(L, t, "gcconsts", (int32_t)pt->sizekgc);
+    setintfield(L, t, "nconsts", (int32_t)pt->sizekn);
+    setintfield(L, t, "upvalues", (int32_t)pt->sizeuv);
+    if (pc < pt->sizebc)
+      setintfield(L, t, "currentline", lj_debug_line(pt, pc));
+    lua_pushboolean(L, (pt->flags & PROTO_VARARG));
+    lua_setfield(L, -2, "isvararg");
+    lua_pushboolean(L, (pt->flags & PROTO_CHILD));
+    lua_setfield(L, -2, "children");
+    setstrV(L, L->top++, proto_chunkname(pt));
+    lua_setfield(L, -2, "source");
+    lj_debug_pushloc(L, pt, pc);
+    lua_setfield(L, -2, "loc");
+  } else {
+    GCfunc *fn = funcV(L->base);
+    GCtab *t;
+    lua_createtable(L, 0, 4);  /* Increment hash size if fields are added. */
+    t = tabV(L->top-1);
+    if (!iscfunc(fn))
+      setintfield(L, t, "ffid", fn->c.ffid);
+    setintptrV(lj_tab_setstr(L, t, lj_str_newlit(L, "addr")),
+	       (intptr_t)(void *)fn->c.f);
+    setintfield(L, t, "upvalues", fn->c.nupvalues);
+  }
+  return 1;
+}
+
+/* local ins, m = jit.util.funcbc(func, pc) */
+LJLIB_CF(jit_util_funcbc)
+{
+  GCproto *pt = check_Lproto(L, 0);
+  BCPos pc = (BCPos)lj_lib_checkint(L, 2);
+  if (pc < pt->sizebc) {
+    BCIns ins = proto_bc(pt)[pc];
+    BCOp op = bc_op(ins);
+    lua_assert(op < BC__MAX);
+    setintV(L->top, ins);
+    setintV(L->top+1, lj_bc_mode[op]);
+    L->top += 2;
+    return 2;
+  }
+  return 0;
+}
+
+/* local k = jit.util.funck(func, idx) */
+LJLIB_CF(jit_util_funck)
+{
+  GCproto *pt = check_Lproto(L, 0);
+  ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2);
+  if (idx >= 0) {
+    if (idx < (ptrdiff_t)pt->sizekn) {
+      copyTV(L, L->top-1, proto_knumtv(pt, idx));
+      return 1;
+    }
+  } else {
+    if (~idx < (ptrdiff_t)pt->sizekgc) {
+      GCobj *gc = proto_kgc(pt, idx);
+      setgcV(L, L->top-1, gc, ~gc->gch.gct);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/* local name = jit.util.funcuvname(func, idx) */
+LJLIB_CF(jit_util_funcuvname)
+{
+  GCproto *pt = check_Lproto(L, 0);
+  uint32_t idx = (uint32_t)lj_lib_checkint(L, 2);
+  if (idx < pt->sizeuv) {
+    setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx)));
+    return 1;
+  }
+  return 0;
+}
+
+/* -- Reflection API for traces ------------------------------------------- */
+
+#if LJ_HASJIT
+
+/* Check trace argument. Must not throw for non-existent trace numbers. */
+static GCtrace *jit_checktrace(lua_State *L)
+{
+  TraceNo tr = (TraceNo)lj_lib_checkint(L, 1);
+  jit_State *J = L2J(L);
+  if (tr > 0 && tr < J->sizetrace)
+    return traceref(J, tr);
+  return NULL;
+}
+
+/* Names of link types. ORDER LJ_TRLINK */
+static const char *const jit_trlinkname[] = {
+  "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
+  "interpreter", "return"
+};
+
+/* local info = jit.util.traceinfo(tr) */
+LJLIB_CF(jit_util_traceinfo)
+{
+  GCtrace *T = jit_checktrace(L);
+  if (T) {
+    GCtab *t;
+    lua_createtable(L, 0, 8);  /* Increment hash size if fields are added. */
+    t = tabV(L->top-1);
+    setintfield(L, t, "nins", (int32_t)T->nins - REF_BIAS - 1);
+    setintfield(L, t, "nk", REF_BIAS - (int32_t)T->nk);
+    setintfield(L, t, "link", T->link);
+    setintfield(L, t, "nexit", T->nsnap);
+    setstrV(L, L->top++, lj_str_newz(L, jit_trlinkname[T->linktype]));
+    lua_setfield(L, -2, "linktype");
+    /* There are many more fields. Add them only when needed. */
+    return 1;
+  }
+  return 0;
+}
+
+/* local m, ot, op1, op2, prev = jit.util.traceir(tr, idx) */
+LJLIB_CF(jit_util_traceir)
+{
+  GCtrace *T = jit_checktrace(L);
+  IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS;
+  if (T && ref >= REF_BIAS && ref < T->nins) {
+    IRIns *ir = &T->ir[ref];
+    int32_t m = lj_ir_mode[ir->o];
+    setintV(L->top-2, m);
+    setintV(L->top-1, ir->ot);
+    setintV(L->top++, (int32_t)ir->op1 - (irm_op1(m)==IRMref ? REF_BIAS : 0));
+    setintV(L->top++, (int32_t)ir->op2 - (irm_op2(m)==IRMref ? REF_BIAS : 0));
+    setintV(L->top++, ir->prev);
+    return 5;
+  }
+  return 0;
+}
+
+/* local k, t [, slot] = jit.util.tracek(tr, idx) */
+LJLIB_CF(jit_util_tracek)
+{
+  GCtrace *T = jit_checktrace(L);
+  IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS;
+  if (T && ref >= T->nk && ref < REF_BIAS) {
+    IRIns *ir = &T->ir[ref];
+    int32_t slot = -1;
+    if (ir->o == IR_KSLOT) {
+      slot = ir->op2;
+      ir = &T->ir[ir->op1];
+    }
+    lj_ir_kvalue(L, L->top-2, ir);
+    setintV(L->top-1, (int32_t)irt_type(ir->t));
+    if (slot == -1)
+      return 2;
+    setintV(L->top++, slot);
+    return 3;
+  }
+  return 0;
+}
+
+/* local snap = jit.util.tracesnap(tr, sn) */
+LJLIB_CF(jit_util_tracesnap)
+{
+  GCtrace *T = jit_checktrace(L);
+  SnapNo sn = (SnapNo)lj_lib_checkint(L, 2);
+  if (T && sn < T->nsnap) {
+    SnapShot *snap = &T->snap[sn];
+    SnapEntry *map = &T->snapmap[snap->mapofs];
+    MSize n, nent = snap->nent;
+    GCtab *t;
+    lua_createtable(L, nent+2, 0);
+    t = tabV(L->top-1);
+    setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS);
+    setintV(lj_tab_setint(L, t, 1), (int32_t)snap->nslots);
+    for (n = 0; n < nent; n++)
+      setintV(lj_tab_setint(L, t, (int32_t)(n+2)), (int32_t)map[n]);
+    setintV(lj_tab_setint(L, t, (int32_t)(nent+2)), (int32_t)SNAP(255, 0, 0));
+    return 1;
+  }
+  return 0;
+}
+
+/* local mcode, addr, loop = jit.util.tracemc(tr) */
+LJLIB_CF(jit_util_tracemc)
+{
+  GCtrace *T = jit_checktrace(L);
+  if (T && T->mcode != NULL) {
+    setstrV(L, L->top-1, lj_str_new(L, (const char *)T->mcode, T->szmcode));
+    setintptrV(L->top++, (intptr_t)(void *)T->mcode);
+    setintV(L->top++, T->mcloop);
+    return 3;
+  }
+  return 0;
+}
+
+/* local addr = jit.util.traceexitstub([tr,] exitno) */
+LJLIB_CF(jit_util_traceexitstub)
+{
+#ifdef EXITSTUBS_PER_GROUP
+  ExitNo exitno = (ExitNo)lj_lib_checkint(L, 1);
+  jit_State *J = L2J(L);
+  if (exitno < EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) {
+    setintptrV(L->top-1, (intptr_t)(void *)exitstub_addr(J, exitno));
+    return 1;
+  }
+#else
+  if (L->top > L->base+1) {  /* Don't throw for one-argument variant. */
+    GCtrace *T = jit_checktrace(L);
+    ExitNo exitno = (ExitNo)lj_lib_checkint(L, 2);
+    ExitNo maxexit = T->root ? T->nsnap+1 : T->nsnap;
+    if (T && T->mcode != NULL && exitno < maxexit) {
+      setintptrV(L->top-1, (intptr_t)(void *)exitstub_trace_addr(T, exitno));
+      return 1;
+    }
+  }
+#endif
+  return 0;
+}
+
+/* local addr = jit.util.ircalladdr(idx) */
+LJLIB_CF(jit_util_ircalladdr)
+{
+  uint32_t idx = (uint32_t)lj_lib_checkint(L, 1);
+  if (idx < IRCALL__MAX) {
+    setintptrV(L->top-1, (intptr_t)(void *)lj_ir_callinfo[idx].func);
+    return 1;
+  }
+  return 0;
+}
+
+#endif
+
+#include "lj_libdef.h"
+
+/* -- jit.opt module ------------------------------------------------------ */
+
+#if LJ_HASJIT
+
+#define LJLIB_MODULE_jit_opt
+
+/* Parse optimization level. */
+static int jitopt_level(jit_State *J, const char *str)
+{
+  if (str[0] >= '0' && str[0] <= '9' && str[1] == '\0') {
+    uint32_t flags;
+    if (str[0] == '0') flags = JIT_F_OPT_0;
+    else if (str[0] == '1') flags = JIT_F_OPT_1;
+    else if (str[0] == '2') flags = JIT_F_OPT_2;
+    else flags = JIT_F_OPT_3;
+    J->flags = (J->flags & ~JIT_F_OPT_MASK) | flags;
+    return 1;  /* Ok. */
+  }
+  return 0;  /* No match. */
+}
+
+/* Parse optimization flag. */
+static int jitopt_flag(jit_State *J, const char *str)
+{
+  const char *lst = JIT_F_OPTSTRING;
+  uint32_t opt;
+  int set = 1;
+  if (str[0] == '+') {
+    str++;
+  } else if (str[0] == '-') {
+    str++;
+    set = 0;
+  } else if (str[0] == 'n' && str[1] == 'o') {
+    str += str[2] == '-' ? 3 : 2;
+    set = 0;
+  }
+  for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) {
+    size_t len = *(const uint8_t *)lst;
+    if (len == 0)
+      break;
+    if (strncmp(str, lst+1, len) == 0 && str[len] == '\0') {
+      if (set) J->flags |= opt; else J->flags &= ~opt;
+      return 1;  /* Ok. */
+    }
+    lst += 1+len;
+  }
+  return 0;  /* No match. */
+}
+
+/* Parse optimization parameter. */
+static int jitopt_param(jit_State *J, const char *str)
+{
+  const char *lst = JIT_P_STRING;
+  int i;
+  for (i = 0; i < JIT_P__MAX; i++) {
+    size_t len = *(const uint8_t *)lst;
+    lua_assert(len != 0);
+    if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
+      int32_t n = 0;
+      const char *p = &str[len+1];
+      while (*p >= '0' && *p <= '9')
+	n = n*10 + (*p++ - '0');
+      if (*p) return 0;  /* Malformed number. */
+      J->param[i] = n;
+      if (i == JIT_P_hotloop)
+	lj_dispatch_init_hotcount(J2G(J));
+      return 1;  /* Ok. */
+    }
+    lst += 1+len;
+  }
+  return 0;  /* No match. */
+}
+
+/* jit.opt.start(flags...) */
+LJLIB_CF(jit_opt_start)
+{
+  jit_State *J = L2J(L);
+  int nargs = (int)(L->top - L->base);
+  if (nargs == 0) {
+    J->flags = (J->flags & ~JIT_F_OPT_MASK) | JIT_F_OPT_DEFAULT;
+  } else {
+    int i;
+    for (i = 1; i <= nargs; i++) {
+      const char *str = strdata(lj_lib_checkstr(L, i));
+      if (!jitopt_level(J, str) &&
+	  !jitopt_flag(J, str) &&
+	  !jitopt_param(J, str))
+	lj_err_callerv(L, LJ_ERR_JITOPT, str);
+    }
+  }
+  return 0;
+}
+
+#include "lj_libdef.h"
+
+#endif
+
+/* -- JIT compiler initialization ----------------------------------------- */
+
+#if LJ_HASJIT
+/* Default values for JIT parameters. */
+static const int32_t jit_param_default[JIT_P__MAX+1] = {
+#define JIT_PARAMINIT(len, name, value)	(value),
+JIT_PARAMDEF(JIT_PARAMINIT)
+#undef JIT_PARAMINIT
+  0
+};
+#endif
+
+#if LJ_TARGET_ARM && LJ_TARGET_LINUX
+#include <sys/utsname.h>
+#endif
+
+/* Arch-dependent CPU detection. */
+static uint32_t jit_cpudetect(lua_State *L)
+{
+  uint32_t flags = 0;
+#if LJ_TARGET_X86ORX64
+  uint32_t vendor[4];
+  uint32_t features[4];
+  if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
+#if !LJ_HASJIT
+#define JIT_F_CMOV	1
+#define JIT_F_SSE2	2
+#endif
+    flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
+    flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
+#if LJ_HASJIT
+    flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
+    flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
+    if (vendor[2] == 0x6c65746e) {  /* Intel. */
+      if ((features[0] & 0x0ff00f00) == 0x00000f00)  /* P4. */
+	flags |= JIT_F_P4;  /* Currently unused. */
+      else if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
+	flags |= JIT_F_LEA_AGU;
+    } else if (vendor[2] == 0x444d4163) {  /* AMD. */
+      uint32_t fam = (features[0] & 0x0ff00f00);
+      if (fam == 0x00000f00)  /* K8. */
+	flags |= JIT_F_SPLIT_XMM;
+      if (fam >= 0x00000f00)  /* K8, K10. */
+	flags |= JIT_F_PREFER_IMUL;
+    }
+#endif
+  }
+  /* Check for required instruction set support on x86 (unnecessary on x64). */
+#if LJ_TARGET_X86
+#if !defined(LUAJIT_CPU_NOCMOV)
+  if (!(flags & JIT_F_CMOV))
+    luaL_error(L, "CPU not supported");
+#endif
+#if defined(LUAJIT_CPU_SSE2)
+  if (!(flags & JIT_F_SSE2))
+    luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
+#endif
+#endif
+#elif LJ_TARGET_ARM
+#if LJ_HASJIT
+  int ver = LJ_ARCH_VERSION;  /* Compile-time ARM CPU detection. */
+#if LJ_TARGET_LINUX
+  if (ver < 70) {  /* Runtime ARM CPU detection. */
+    struct utsname ut;
+    uname(&ut);
+    if (strncmp(ut.machine, "armv", 4) == 0) {
+      if (ut.machine[4] >= '7')
+	ver = 70;
+      else if (ut.machine[4] == '6')
+	ver = 60;
+    }
+  }
+#endif
+  flags |= ver >= 70 ? JIT_F_ARMV7 :
+	   ver >= 61 ? JIT_F_ARMV6T2_ :
+	   ver >= 60 ? JIT_F_ARMV6_ : 0;
+  flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
+#endif
+#elif LJ_TARGET_PPC
+#if LJ_HASJIT
+#if LJ_ARCH_SQRT
+  flags |= JIT_F_SQRT;
+#endif
+#if LJ_ARCH_ROUND
+  flags |= JIT_F_ROUND;
+#endif
+#endif
+#elif LJ_TARGET_PPCSPE
+  /* Nothing to do. */
+#elif LJ_TARGET_MIPS
+#if LJ_HASJIT
+  /* Compile-time MIPS CPU detection. */
+#if LJ_ARCH_VERSION >= 20
+  flags |= JIT_F_MIPS32R2;
+#endif
+  /* Runtime MIPS CPU detection. */
+#if defined(__GNUC__)
+  if (!(flags & JIT_F_MIPS32R2)) {
+    int x;
+    /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
+    __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
+    if (x) flags |= JIT_F_MIPS32R2;  /* Either 0x80000000 (R2) or 0 (R1). */
+  }
+#endif
+#endif
+#else
+#error "Missing CPU detection for this architecture"
+#endif
+  UNUSED(L);
+  return flags;
+}
+
+/* Initialize JIT compiler. */
+static void jit_init(lua_State *L)
+{
+  uint32_t flags = jit_cpudetect(L);
+#if LJ_HASJIT
+  jit_State *J = L2J(L);
+#if LJ_TARGET_X86
+  /* Silently turn off the JIT compiler on CPUs without SSE2. */
+  if ((flags & JIT_F_SSE2))
+#endif
+    J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
+  memcpy(J->param, jit_param_default, sizeof(J->param));
+  lj_dispatch_update(G(L));
+#else
+  UNUSED(flags);
+#endif
+}
+
+LUALIB_API int luaopen_jit(lua_State *L)
+{
+  lua_pushliteral(L, LJ_OS_NAME);
+  lua_pushliteral(L, LJ_ARCH_NAME);
+  lua_pushinteger(L, LUAJIT_VERSION_NUM);
+  lua_pushliteral(L, LUAJIT_VERSION);
+  LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
+#ifndef LUAJIT_DISABLE_JITUTIL
+  LJ_LIB_REG(L, "jit.util", jit_util);
+#endif
+#if LJ_HASJIT
+  LJ_LIB_REG(L, "jit.opt", jit_opt);
+#endif
+  L->top -= 2;
+  jit_init(L);
+  return 1;
+}
+

+ 233 - 0
luajit.mod/luajit/src/lib_math.c

@@ -0,0 +1,233 @@
+/*
+** Math library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include <math.h>
+
+#define lib_math_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_lib.h"
+#include "lj_vm.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_math
+
+LJLIB_ASM(math_abs)		LJLIB_REC(.)
+{
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+}
+LJLIB_ASM_(math_floor)		LJLIB_REC(math_round IRFPM_FLOOR)
+LJLIB_ASM_(math_ceil)		LJLIB_REC(math_round IRFPM_CEIL)
+
+LJLIB_ASM(math_sqrt)		LJLIB_REC(math_unary IRFPM_SQRT)
+{
+  lj_lib_checknum(L, 1);
+  return FFH_RETRY;
+}
+LJLIB_ASM_(math_log10)		LJLIB_REC(math_unary IRFPM_LOG10)
+LJLIB_ASM_(math_exp)		LJLIB_REC(math_unary IRFPM_EXP)
+LJLIB_ASM_(math_sin)		LJLIB_REC(math_unary IRFPM_SIN)
+LJLIB_ASM_(math_cos)		LJLIB_REC(math_unary IRFPM_COS)
+LJLIB_ASM_(math_tan)		LJLIB_REC(math_unary IRFPM_TAN)
+LJLIB_ASM_(math_asin)		LJLIB_REC(math_atrig FF_math_asin)
+LJLIB_ASM_(math_acos)		LJLIB_REC(math_atrig FF_math_acos)
+LJLIB_ASM_(math_atan)		LJLIB_REC(math_atrig FF_math_atan)
+LJLIB_ASM_(math_sinh)		LJLIB_REC(math_htrig IRCALL_sinh)
+LJLIB_ASM_(math_cosh)		LJLIB_REC(math_htrig IRCALL_cosh)
+LJLIB_ASM_(math_tanh)		LJLIB_REC(math_htrig IRCALL_tanh)
+LJLIB_ASM_(math_frexp)
+LJLIB_ASM_(math_modf)		LJLIB_REC(.)
+
+LJLIB_PUSH(57.29577951308232)
+LJLIB_ASM_(math_deg)		LJLIB_REC(math_degrad)
+
+LJLIB_PUSH(0.017453292519943295)
+LJLIB_ASM_(math_rad)		LJLIB_REC(math_degrad)
+
+LJLIB_ASM(math_log)		LJLIB_REC(math_log)
+{
+  double x = lj_lib_checknum(L, 1);
+  if (L->base+1 < L->top) {
+    double y = lj_lib_checknum(L, 2);
+#ifdef LUAJIT_NO_LOG2
+    x = log(x); y = 1.0 / log(y);
+#else
+    x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
+#endif
+    setnumV(L->base-1, x*y);  /* Do NOT join the expression to x / y. */
+    return FFH_RES(1);
+  }
+  return FFH_RETRY;
+}
+
+LJLIB_ASM(math_atan2)		LJLIB_REC(.)
+{
+  lj_lib_checknum(L, 1);
+  lj_lib_checknum(L, 2);
+  return FFH_RETRY;
+}
+LJLIB_ASM_(math_pow)		LJLIB_REC(.)
+LJLIB_ASM_(math_fmod)
+
+LJLIB_ASM(math_ldexp)		LJLIB_REC(.)
+{
+  lj_lib_checknum(L, 1);
+#if LJ_DUALNUM && !LJ_TARGET_X86ORX64
+  lj_lib_checkint(L, 2);
+#else
+  lj_lib_checknum(L, 2);
+#endif
+  return FFH_RETRY;
+}
+
+LJLIB_ASM(math_min)		LJLIB_REC(math_minmax IR_MIN)
+{
+  int i = 0;
+  do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
+  return FFH_RETRY;
+}
+LJLIB_ASM_(math_max)		LJLIB_REC(math_minmax IR_MAX)
+
+LJLIB_PUSH(3.14159265358979323846) LJLIB_SET(pi)
+LJLIB_PUSH(1e310) LJLIB_SET(huge)
+
+/* ------------------------------------------------------------------------ */
+
+/* This implements a Tausworthe PRNG with period 2^223. Based on:
+**   Tables of maximally-equidistributed combined LFSR generators,
+**   Pierre L'Ecuyer, 1991, table 3, 1st entry.
+** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
+*/
+
+/* PRNG state. */
+struct RandomState {
+  uint64_t gen[4];	/* State of the 4 LFSR generators. */
+  int valid;		/* State is valid. */
+};
+
+/* Union needed for bit-pattern conversion between uint64_t and double. */
+typedef union { uint64_t u64; double d; } U64double;
+
+/* Update generator i and compute a running xor of all states. */
+#define TW223_GEN(i, k, q, s) \
+  z = rs->gen[i]; \
+  z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
+  r ^= z; rs->gen[i] = z;
+
+/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
+LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
+{
+  uint64_t z, r = 0;
+  TW223_GEN(0, 63, 31, 18)
+  TW223_GEN(1, 58, 19, 28)
+  TW223_GEN(2, 55, 24,  7)
+  TW223_GEN(3, 47, 21,  8)
+  return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
+}
+
+/* PRNG initialization function. */
+static void random_init(RandomState *rs, double d)
+{
+  uint32_t r = 0x11090601;  /* 64-k[i] as four 8 bit constants. */
+  int i;
+  for (i = 0; i < 4; i++) {
+    U64double u;
+    uint32_t m = 1u << (r&255);
+    r >>= 8;
+    u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
+    if (u.u64 < m) u.u64 += m;  /* Ensure k[i] MSB of gen[i] are non-zero. */
+    rs->gen[i] = u.u64;
+  }
+  rs->valid = 1;
+  for (i = 0; i < 10; i++)
+    lj_math_random_step(rs);
+}
+
+/* PRNG extract function. */
+LJLIB_PUSH(top-2)  /* Upvalue holds userdata with RandomState. */
+LJLIB_CF(math_random)		LJLIB_REC(.)
+{
+  int n = (int)(L->top - L->base);
+  RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+  U64double u;
+  double d;
+  if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0);
+  u.u64 = lj_math_random_step(rs);
+  d = u.d - 1.0;
+  if (n > 0) {
+#if LJ_DUALNUM
+    int isint = 1;
+    double r1;
+    lj_lib_checknumber(L, 1);
+    if (tvisint(L->base)) {
+      r1 = (lua_Number)intV(L->base);
+    } else {
+      isint = 0;
+      r1 = numV(L->base);
+    }
+#else
+    double r1 = lj_lib_checknum(L, 1);
+#endif
+    if (n == 1) {
+      d = lj_vm_floor(d*r1) + 1.0;  /* d is an int in range [1, r1] */
+    } else {
+#if LJ_DUALNUM
+      double r2;
+      lj_lib_checknumber(L, 2);
+      if (tvisint(L->base+1)) {
+	r2 = (lua_Number)intV(L->base+1);
+      } else {
+	isint = 0;
+	r2 = numV(L->base+1);
+      }
+#else
+      double r2 = lj_lib_checknum(L, 2);
+#endif
+      d = lj_vm_floor(d*(r2-r1+1.0)) + r1;  /* d is an int in range [r1, r2] */
+    }
+#if LJ_DUALNUM
+    if (isint) {
+      setintV(L->top-1, lj_num2int(d));
+      return 1;
+    }
+#endif
+  }  /* else: d is a double in range [0, 1] */
+  setnumV(L->top++, d);
+  return 1;
+}
+
+/* PRNG seed function. */
+LJLIB_PUSH(top-2)  /* Upvalue holds userdata with RandomState. */
+LJLIB_CF(math_randomseed)
+{
+  RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+  random_init(rs, lj_lib_checknum(L, 1));
+  return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_math(lua_State *L)
+{
+  RandomState *rs;
+  rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
+  rs->valid = 0;  /* Use lazy initialization to save some time on startup. */
+  LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
+#if defined(LUA_COMPAT_MOD) && !LJ_52
+  lua_getfield(L, -1, "fmod");
+  lua_setfield(L, -2, "mod");
+#endif
+  return 1;
+}
+

+ 287 - 0
luajit.mod/luajit/src/lib_os.c

@@ -0,0 +1,287 @@
+/*
+** OS library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <errno.h>
+#include <time.h>
+
+#define lib_os_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_lib.h"
+
+#if LJ_TARGET_POSIX
+#include <unistd.h>
+#else
+#include <stdio.h>
+#endif
+
+#if !LJ_TARGET_PSVITA
+#include <locale.h>
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_os
+
+LJLIB_CF(os_execute)
+{
+#if LJ_TARGET_CONSOLE
+#if LJ_52
+  errno = ENOSYS;
+  return luaL_fileresult(L, 0, NULL);
+#else
+  lua_pushinteger(L, -1);
+  return 1;
+#endif
+#else
+  const char *cmd = luaL_optstring(L, 1, NULL);
+  int stat = system(cmd);
+#if LJ_52
+  if (cmd)
+    return luaL_execresult(L, stat);
+  setboolV(L->top++, 1);
+#else
+  setintV(L->top++, stat);
+#endif
+  return 1;
+#endif
+}
+
+LJLIB_CF(os_remove)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  return luaL_fileresult(L, remove(filename) == 0, filename);
+}
+
+LJLIB_CF(os_rename)
+{
+  const char *fromname = luaL_checkstring(L, 1);
+  const char *toname = luaL_checkstring(L, 2);
+  return luaL_fileresult(L, rename(fromname, toname) == 0, fromname);
+}
+
+LJLIB_CF(os_tmpname)
+{
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
+  lj_err_caller(L, LJ_ERR_OSUNIQF);
+  return 0;
+#else
+#if LJ_TARGET_POSIX
+  char buf[15+1];
+  int fp;
+  strcpy(buf, "/tmp/lua_XXXXXX");
+  fp = mkstemp(buf);
+  if (fp != -1)
+    close(fp);
+  else
+    lj_err_caller(L, LJ_ERR_OSUNIQF);
+#else
+  char buf[L_tmpnam];
+  if (tmpnam(buf) == NULL)
+    lj_err_caller(L, LJ_ERR_OSUNIQF);
+#endif
+  lua_pushstring(L, buf);
+  return 1;
+#endif
+}
+
+LJLIB_CF(os_getenv)
+{
+#if LJ_TARGET_CONSOLE
+  lua_pushnil(L);
+#else
+  lua_pushstring(L, getenv(luaL_checkstring(L, 1)));  /* if NULL push nil */
+#endif
+  return 1;
+}
+
+LJLIB_CF(os_exit)
+{
+  int status;
+  if (L->base < L->top && tvisbool(L->base))
+    status = boolV(L->base) ? EXIT_SUCCESS : EXIT_FAILURE;
+  else
+    status = lj_lib_optint(L, 1, EXIT_SUCCESS);
+  if (L->base+1 < L->top && tvistruecond(L->base+1))
+    lua_close(L);
+  exit(status);
+  return 0;  /* Unreachable. */
+}
+
+LJLIB_CF(os_clock)
+{
+  setnumV(L->top++, ((lua_Number)clock())*(1.0/(lua_Number)CLOCKS_PER_SEC));
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void setfield(lua_State *L, const char *key, int value)
+{
+  lua_pushinteger(L, value);
+  lua_setfield(L, -2, key);
+}
+
+static void setboolfield(lua_State *L, const char *key, int value)
+{
+  if (value < 0)  /* undefined? */
+    return;  /* does not set field */
+  lua_pushboolean(L, value);
+  lua_setfield(L, -2, key);
+}
+
+static int getboolfield(lua_State *L, const char *key)
+{
+  int res;
+  lua_getfield(L, -1, key);
+  res = lua_isnil(L, -1) ? -1 : lua_toboolean(L, -1);
+  lua_pop(L, 1);
+  return res;
+}
+
+static int getfield(lua_State *L, const char *key, int d)
+{
+  int res;
+  lua_getfield(L, -1, key);
+  if (lua_isnumber(L, -1)) {
+    res = (int)lua_tointeger(L, -1);
+  } else {
+    if (d < 0)
+      lj_err_callerv(L, LJ_ERR_OSDATEF, key);
+    res = d;
+  }
+  lua_pop(L, 1);
+  return res;
+}
+
+LJLIB_CF(os_date)
+{
+  const char *s = luaL_optstring(L, 1, "%c");
+  time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL));
+  struct tm *stm;
+#if LJ_TARGET_POSIX
+  struct tm rtm;
+#endif
+  if (*s == '!') {  /* UTC? */
+    s++;  /* Skip '!' */
+#if LJ_TARGET_POSIX
+    stm = gmtime_r(&t, &rtm);
+#else
+    stm = gmtime(&t);
+#endif
+  } else {
+#if LJ_TARGET_POSIX
+    stm = localtime_r(&t, &rtm);
+#else
+    stm = localtime(&t);
+#endif
+  }
+  if (stm == NULL) {  /* Invalid date? */
+    setnilV(L->top-1);
+  } else if (strcmp(s, "*t") == 0) {
+    lua_createtable(L, 0, 9);  /* 9 = number of fields */
+    setfield(L, "sec", stm->tm_sec);
+    setfield(L, "min", stm->tm_min);
+    setfield(L, "hour", stm->tm_hour);
+    setfield(L, "day", stm->tm_mday);
+    setfield(L, "month", stm->tm_mon+1);
+    setfield(L, "year", stm->tm_year+1900);
+    setfield(L, "wday", stm->tm_wday+1);
+    setfield(L, "yday", stm->tm_yday+1);
+    setboolfield(L, "isdst", stm->tm_isdst);
+  } else {
+    char cc[3];
+    luaL_Buffer b;
+    cc[0] = '%'; cc[2] = '\0';
+    luaL_buffinit(L, &b);
+    for (; *s; s++) {
+      if (*s != '%' || *(s + 1) == '\0') {  /* No conversion specifier? */
+	luaL_addchar(&b, *s);
+      } else {
+	size_t reslen;
+	char buff[200];  /* Should be big enough for any conversion result. */
+	cc[1] = *(++s);
+	reslen = strftime(buff, sizeof(buff), cc, stm);
+	luaL_addlstring(&b, buff, reslen);
+      }
+    }
+    luaL_pushresult(&b);
+  }
+  return 1;
+}
+
+LJLIB_CF(os_time)
+{
+  time_t t;
+  if (lua_isnoneornil(L, 1)) {  /* called without args? */
+    t = time(NULL);  /* get current time */
+  } else {
+    struct tm ts;
+    luaL_checktype(L, 1, LUA_TTABLE);
+    lua_settop(L, 1);  /* make sure table is at the top */
+    ts.tm_sec = getfield(L, "sec", 0);
+    ts.tm_min = getfield(L, "min", 0);
+    ts.tm_hour = getfield(L, "hour", 12);
+    ts.tm_mday = getfield(L, "day", -1);
+    ts.tm_mon = getfield(L, "month", -1) - 1;
+    ts.tm_year = getfield(L, "year", -1) - 1900;
+    ts.tm_isdst = getboolfield(L, "isdst");
+    t = mktime(&ts);
+  }
+  if (t == (time_t)(-1))
+    lua_pushnil(L);
+  else
+    lua_pushnumber(L, (lua_Number)t);
+  return 1;
+}
+
+LJLIB_CF(os_difftime)
+{
+  lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)),
+			     (time_t)(luaL_optnumber(L, 2, (lua_Number)0))));
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(os_setlocale)
+{
+#if LJ_TARGET_PSVITA
+  lua_pushliteral(L, "C");
+#else
+  GCstr *s = lj_lib_optstr(L, 1);
+  const char *str = s ? strdata(s) : NULL;
+  int opt = lj_lib_checkopt(L, 2, 6,
+    "\5ctype\7numeric\4time\7collate\10monetary\1\377\3all");
+  if (opt == 0) opt = LC_CTYPE;
+  else if (opt == 1) opt = LC_NUMERIC;
+  else if (opt == 2) opt = LC_TIME;
+  else if (opt == 3) opt = LC_COLLATE;
+  else if (opt == 4) opt = LC_MONETARY;
+  else if (opt == 6) opt = LC_ALL;
+  lua_pushstring(L, setlocale(opt, str));
+#endif
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_os(lua_State *L)
+{
+  LJ_LIB_REG(L, LUA_OSLIBNAME, os);
+  return 1;
+}
+

+ 602 - 0
luajit.mod/luajit/src/lib_package.c

@@ -0,0 +1,602 @@
+/*
+** Package library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lib_package_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Error codes for ll_loadfunc. */
+#define PACKAGE_ERR_LIB		1
+#define PACKAGE_ERR_FUNC	2
+#define PACKAGE_ERR_LOAD	3
+
+/* Redefined in platform specific part. */
+#define PACKAGE_LIB_FAIL	"open"
+#define setprogdir(L)		((void)0)
+
+/* Symbol name prefixes. */
+#define SYMPREFIX_CF		"luaopen_%s"
+#define SYMPREFIX_BC		"luaJIT_BC_%s"
+
+#if LJ_TARGET_DLOPEN
+
+#include <dlfcn.h>
+
+static void ll_unloadlib(void *lib)
+{
+  dlclose(lib);
+}
+
+static void *ll_load(lua_State *L, const char *path, int gl)
+{
+  void *lib = dlopen(path, RTLD_NOW | (gl ? RTLD_GLOBAL : RTLD_LOCAL));
+  if (lib == NULL) lua_pushstring(L, dlerror());
+  return lib;
+}
+
+static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
+{
+  lua_CFunction f = (lua_CFunction)dlsym(lib, sym);
+  if (f == NULL) lua_pushstring(L, dlerror());
+  return f;
+}
+
+static const char *ll_bcsym(void *lib, const char *sym)
+{
+#if defined(RTLD_DEFAULT)
+  if (lib == NULL) lib = RTLD_DEFAULT;
+#elif LJ_TARGET_OSX || LJ_TARGET_BSD
+  if (lib == NULL) lib = (void *)(intptr_t)-2;
+#endif
+  return (const char *)dlsym(lib, sym);
+}
+
+#elif LJ_TARGET_WINDOWS
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#ifndef GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
+#define GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS  4
+#define GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT  2
+BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
+#endif
+
+#undef setprogdir
+
+static void setprogdir(lua_State *L)
+{
+  char buff[MAX_PATH + 1];
+  char *lb;
+  DWORD nsize = sizeof(buff);
+  DWORD n = GetModuleFileNameA(NULL, buff, nsize);
+  if (n == 0 || n == nsize || (lb = strrchr(buff, '\\')) == NULL) {
+    luaL_error(L, "unable to get ModuleFileName");
+  } else {
+    *lb = '\0';
+    luaL_gsub(L, lua_tostring(L, -1), LUA_EXECDIR, buff);
+    lua_remove(L, -2);  /* remove original string */
+  }
+}
+
+static void pusherror(lua_State *L)
+{
+  DWORD error = GetLastError();
+  char buffer[128];
+  if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
+      NULL, error, 0, buffer, sizeof(buffer), NULL))
+    lua_pushstring(L, buffer);
+  else
+    lua_pushfstring(L, "system error %d\n", error);
+}
+
+static void ll_unloadlib(void *lib)
+{
+  FreeLibrary((HINSTANCE)lib);
+}
+
+static void *ll_load(lua_State *L, const char *path, int gl)
+{
+  HINSTANCE lib = LoadLibraryA(path);
+  if (lib == NULL) pusherror(L);
+  UNUSED(gl);
+  return lib;
+}
+
+static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
+{
+  lua_CFunction f = (lua_CFunction)GetProcAddress((HINSTANCE)lib, sym);
+  if (f == NULL) pusherror(L);
+  return f;
+}
+
+static const char *ll_bcsym(void *lib, const char *sym)
+{
+  if (lib) {
+    return (const char *)GetProcAddress((HINSTANCE)lib, sym);
+  } else {
+    HINSTANCE h = GetModuleHandleA(NULL);
+    const char *p = (const char *)GetProcAddress(h, sym);
+    if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+					(const char *)ll_bcsym, &h))
+      p = (const char *)GetProcAddress(h, sym);
+    return p;
+  }
+}
+
+#else
+
+#undef PACKAGE_LIB_FAIL
+#define PACKAGE_LIB_FAIL	"absent"
+
+#define DLMSG	"dynamic libraries not enabled; no support for target OS"
+
+static void ll_unloadlib(void *lib)
+{
+  UNUSED(lib);
+}
+
+static void *ll_load(lua_State *L, const char *path, int gl)
+{
+  UNUSED(path); UNUSED(gl);
+  lua_pushliteral(L, DLMSG);
+  return NULL;
+}
+
+static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
+{
+  UNUSED(lib); UNUSED(sym);
+  lua_pushliteral(L, DLMSG);
+  return NULL;
+}
+
+static const char *ll_bcsym(void *lib, const char *sym)
+{
+  UNUSED(lib); UNUSED(sym);
+  return NULL;
+}
+
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+static void **ll_register(lua_State *L, const char *path)
+{
+  void **plib;
+  lua_pushfstring(L, "LOADLIB: %s", path);
+  lua_gettable(L, LUA_REGISTRYINDEX);  /* check library in registry? */
+  if (!lua_isnil(L, -1)) {  /* is there an entry? */
+    plib = (void **)lua_touserdata(L, -1);
+  } else {  /* no entry yet; create one */
+    lua_pop(L, 1);
+    plib = (void **)lua_newuserdata(L, sizeof(void *));
+    *plib = NULL;
+    luaL_getmetatable(L, "_LOADLIB");
+    lua_setmetatable(L, -2);
+    lua_pushfstring(L, "LOADLIB: %s", path);
+    lua_pushvalue(L, -2);
+    lua_settable(L, LUA_REGISTRYINDEX);
+  }
+  return plib;
+}
+
+static const char *mksymname(lua_State *L, const char *modname,
+			     const char *prefix)
+{
+  const char *funcname;
+  const char *mark = strchr(modname, *LUA_IGMARK);
+  if (mark) modname = mark + 1;
+  funcname = luaL_gsub(L, modname, ".", "_");
+  funcname = lua_pushfstring(L, prefix, funcname);
+  lua_remove(L, -2);  /* remove 'gsub' result */
+  return funcname;
+}
+
+static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
+{
+  void **reg = ll_register(L, path);
+  if (*reg == NULL) *reg = ll_load(L, path, (*name == '*'));
+  if (*reg == NULL) {
+    return PACKAGE_ERR_LIB;  /* Unable to load library. */
+  } else if (*name == '*') {  /* Only load library into global namespace. */
+    lua_pushboolean(L, 1);
+    return 0;
+  } else {
+    const char *sym = r ? name : mksymname(L, name, SYMPREFIX_CF);
+    lua_CFunction f = ll_sym(L, *reg, sym);
+    if (f) {
+      lua_pushcfunction(L, f);
+      return 0;
+    }
+    if (!r) {
+      const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
+      lua_pop(L, 1);
+      if (bcdata) {
+	if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+	  return PACKAGE_ERR_LOAD;
+	return 0;
+      }
+    }
+    return PACKAGE_ERR_FUNC;  /* Unable to find function. */
+  }
+}
+
+static int lj_cf_package_loadlib(lua_State *L)
+{
+  const char *path = luaL_checkstring(L, 1);
+  const char *init = luaL_checkstring(L, 2);
+  int st = ll_loadfunc(L, path, init, 1);
+  if (st == 0) {  /* no errors? */
+    return 1;  /* return the loaded function */
+  } else {  /* error; error message is on stack top */
+    lua_pushnil(L);
+    lua_insert(L, -2);
+    lua_pushstring(L, (st == PACKAGE_ERR_LIB) ?  PACKAGE_LIB_FAIL : "init");
+    return 3;  /* return nil, error message, and where */
+  }
+}
+
+static int lj_cf_package_unloadlib(lua_State *L)
+{
+  void **lib = (void **)luaL_checkudata(L, 1, "_LOADLIB");
+  if (*lib) ll_unloadlib(*lib);
+  *lib = NULL;  /* mark library as closed */
+  return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int readable(const char *filename)
+{
+  FILE *f = fopen(filename, "r");  /* try to open file */
+  if (f == NULL) return 0;  /* open failed */
+  fclose(f);
+  return 1;
+}
+
+static const char *pushnexttemplate(lua_State *L, const char *path)
+{
+  const char *l;
+  while (*path == *LUA_PATHSEP) path++;  /* skip separators */
+  if (*path == '\0') return NULL;  /* no more templates */
+  l = strchr(path, *LUA_PATHSEP);  /* find next separator */
+  if (l == NULL) l = path + strlen(path);
+  lua_pushlstring(L, path, (size_t)(l - path));  /* template */
+  return l;
+}
+
+static const char *searchpath (lua_State *L, const char *name,
+			       const char *path, const char *sep,
+			       const char *dirsep)
+{
+  luaL_Buffer msg;  /* to build error message */
+  luaL_buffinit(L, &msg);
+  if (*sep != '\0')  /* non-empty separator? */
+    name = luaL_gsub(L, name, sep, dirsep);  /* replace it by 'dirsep' */
+  while ((path = pushnexttemplate(L, path)) != NULL) {
+    const char *filename = luaL_gsub(L, lua_tostring(L, -1),
+				     LUA_PATH_MARK, name);
+    lua_remove(L, -2);  /* remove path template */
+    if (readable(filename))  /* does file exist and is readable? */
+      return filename;  /* return that file name */
+    lua_pushfstring(L, "\n\tno file " LUA_QS, filename);
+    lua_remove(L, -2);  /* remove file name */
+    luaL_addvalue(&msg);  /* concatenate error msg. entry */
+  }
+  luaL_pushresult(&msg);  /* create error message */
+  return NULL;  /* not found */
+}
+
+static int lj_cf_package_searchpath(lua_State *L)
+{
+  const char *f = searchpath(L, luaL_checkstring(L, 1),
+				luaL_checkstring(L, 2),
+				luaL_optstring(L, 3, "."),
+				luaL_optstring(L, 4, LUA_DIRSEP));
+  if (f != NULL) {
+    return 1;
+  } else {  /* error message is on top of the stack */
+    lua_pushnil(L);
+    lua_insert(L, -2);
+    return 2;  /* return nil + error message */
+  }
+}
+
+static const char *findfile(lua_State *L, const char *name,
+			    const char *pname)
+{
+  const char *path;
+  lua_getfield(L, LUA_ENVIRONINDEX, pname);
+  path = lua_tostring(L, -1);
+  if (path == NULL)
+    luaL_error(L, LUA_QL("package.%s") " must be a string", pname);
+  return searchpath(L, name, path, ".", LUA_DIRSEP);
+}
+
+static void loaderror(lua_State *L, const char *filename)
+{
+  luaL_error(L, "error loading module " LUA_QS " from file " LUA_QS ":\n\t%s",
+	     lua_tostring(L, 1), filename, lua_tostring(L, -1));
+}
+
+static int lj_cf_package_loader_lua(lua_State *L)
+{
+  const char *filename;
+  const char *name = luaL_checkstring(L, 1);
+  filename = findfile(L, name, "path");
+  if (filename == NULL) return 1;  /* library not found in this path */
+  if (luaL_loadfile(L, filename) != 0)
+    loaderror(L, filename);
+  return 1;  /* library loaded successfully */
+}
+
+static int lj_cf_package_loader_c(lua_State *L)
+{
+  const char *name = luaL_checkstring(L, 1);
+  const char *filename = findfile(L, name, "cpath");
+  if (filename == NULL) return 1;  /* library not found in this path */
+  if (ll_loadfunc(L, filename, name, 0) != 0)
+    loaderror(L, filename);
+  return 1;  /* library loaded successfully */
+}
+
+static int lj_cf_package_loader_croot(lua_State *L)
+{
+  const char *filename;
+  const char *name = luaL_checkstring(L, 1);
+  const char *p = strchr(name, '.');
+  int st;
+  if (p == NULL) return 0;  /* is root */
+  lua_pushlstring(L, name, (size_t)(p - name));
+  filename = findfile(L, lua_tostring(L, -1), "cpath");
+  if (filename == NULL) return 1;  /* root not found */
+  if ((st = ll_loadfunc(L, filename, name, 0)) != 0) {
+    if (st != PACKAGE_ERR_FUNC) loaderror(L, filename);  /* real error */
+    lua_pushfstring(L, "\n\tno module " LUA_QS " in file " LUA_QS,
+		    name, filename);
+    return 1;  /* function not found */
+  }
+  return 1;
+}
+
+static int lj_cf_package_loader_preload(lua_State *L)
+{
+  const char *name = luaL_checkstring(L, 1);
+  lua_getfield(L, LUA_ENVIRONINDEX, "preload");
+  if (!lua_istable(L, -1))
+    luaL_error(L, LUA_QL("package.preload") " must be a table");
+  lua_getfield(L, -1, name);
+  if (lua_isnil(L, -1)) {  /* Not found? */
+    const char *bcname = mksymname(L, name, SYMPREFIX_BC);
+    const char *bcdata = ll_bcsym(NULL, bcname);
+    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+      lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
+  }
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static const int sentinel_ = 0;
+#define sentinel	((void *)&sentinel_)
+
+static int lj_cf_package_require(lua_State *L)
+{
+  const char *name = luaL_checkstring(L, 1);
+  int i;
+  lua_settop(L, 1);  /* _LOADED table will be at index 2 */
+  lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
+  lua_getfield(L, 2, name);
+  if (lua_toboolean(L, -1)) {  /* is it there? */
+    if (lua_touserdata(L, -1) == sentinel)  /* check loops */
+      luaL_error(L, "loop or previous error loading module " LUA_QS, name);
+    return 1;  /* package is already loaded */
+  }
+  /* else must load it; iterate over available loaders */
+  lua_getfield(L, LUA_ENVIRONINDEX, "loaders");
+  if (!lua_istable(L, -1))
+    luaL_error(L, LUA_QL("package.loaders") " must be a table");
+  lua_pushliteral(L, "");  /* error message accumulator */
+  for (i = 1; ; i++) {
+    lua_rawgeti(L, -2, i);  /* get a loader */
+    if (lua_isnil(L, -1))
+      luaL_error(L, "module " LUA_QS " not found:%s",
+		 name, lua_tostring(L, -2));
+    lua_pushstring(L, name);
+    lua_call(L, 1, 1);  /* call it */
+    if (lua_isfunction(L, -1))  /* did it find module? */
+      break;  /* module loaded successfully */
+    else if (lua_isstring(L, -1))  /* loader returned error message? */
+      lua_concat(L, 2);  /* accumulate it */
+    else
+      lua_pop(L, 1);
+  }
+  lua_pushlightuserdata(L, sentinel);
+  lua_setfield(L, 2, name);  /* _LOADED[name] = sentinel */
+  lua_pushstring(L, name);  /* pass name as argument to module */
+  lua_call(L, 1, 1);  /* run loaded module */
+  if (!lua_isnil(L, -1))  /* non-nil return? */
+    lua_setfield(L, 2, name);  /* _LOADED[name] = returned value */
+  lua_getfield(L, 2, name);
+  if (lua_touserdata(L, -1) == sentinel) {   /* module did not set a value? */
+    lua_pushboolean(L, 1);  /* use true as result */
+    lua_pushvalue(L, -1);  /* extra copy to be returned */
+    lua_setfield(L, 2, name);  /* _LOADED[name] = true */
+  }
+  lj_lib_checkfpu(L);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void setfenv(lua_State *L)
+{
+  lua_Debug ar;
+  if (lua_getstack(L, 1, &ar) == 0 ||
+      lua_getinfo(L, "f", &ar) == 0 ||  /* get calling function */
+      lua_iscfunction(L, -1))
+    luaL_error(L, LUA_QL("module") " not called from a Lua function");
+  lua_pushvalue(L, -2);
+  lua_setfenv(L, -2);
+  lua_pop(L, 1);
+}
+
+static void dooptions(lua_State *L, int n)
+{
+  int i;
+  for (i = 2; i <= n; i++) {
+    lua_pushvalue(L, i);  /* get option (a function) */
+    lua_pushvalue(L, -2);  /* module */
+    lua_call(L, 1, 0);
+  }
+}
+
+static void modinit(lua_State *L, const char *modname)
+{
+  const char *dot;
+  lua_pushvalue(L, -1);
+  lua_setfield(L, -2, "_M");  /* module._M = module */
+  lua_pushstring(L, modname);
+  lua_setfield(L, -2, "_NAME");
+  dot = strrchr(modname, '.');  /* look for last dot in module name */
+  if (dot == NULL) dot = modname; else dot++;
+  /* set _PACKAGE as package name (full module name minus last part) */
+  lua_pushlstring(L, modname, (size_t)(dot - modname));
+  lua_setfield(L, -2, "_PACKAGE");
+}
+
+static int lj_cf_package_module(lua_State *L)
+{
+  const char *modname = luaL_checkstring(L, 1);
+  int loaded = lua_gettop(L) + 1;  /* index of _LOADED table */
+  lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
+  lua_getfield(L, loaded, modname);  /* get _LOADED[modname] */
+  if (!lua_istable(L, -1)) {  /* not found? */
+    lua_pop(L, 1);  /* remove previous result */
+    /* try global variable (and create one if it does not exist) */
+    if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL)
+      lj_err_callerv(L, LJ_ERR_BADMODN, modname);
+    lua_pushvalue(L, -1);
+    lua_setfield(L, loaded, modname);  /* _LOADED[modname] = new table */
+  }
+  /* check whether table already has a _NAME field */
+  lua_getfield(L, -1, "_NAME");
+  if (!lua_isnil(L, -1)) {  /* is table an initialized module? */
+    lua_pop(L, 1);
+  } else {  /* no; initialize it */
+    lua_pop(L, 1);
+    modinit(L, modname);
+  }
+  lua_pushvalue(L, -1);
+  setfenv(L);
+  dooptions(L, loaded - 1);
+  return 0;
+}
+
+static int lj_cf_package_seeall(lua_State *L)
+{
+  luaL_checktype(L, 1, LUA_TTABLE);
+  if (!lua_getmetatable(L, 1)) {
+    lua_createtable(L, 0, 1); /* create new metatable */
+    lua_pushvalue(L, -1);
+    lua_setmetatable(L, 1);
+  }
+  lua_pushvalue(L, LUA_GLOBALSINDEX);
+  lua_setfield(L, -2, "__index");  /* mt.__index = _G */
+  return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#define AUXMARK		"\1"
+
+static void setpath(lua_State *L, const char *fieldname, const char *envname,
+		    const char *def, int noenv)
+{
+#if LJ_TARGET_CONSOLE
+  const char *path = NULL;
+  UNUSED(envname);
+#else
+  const char *path = getenv(envname);
+#endif
+  if (path == NULL || noenv) {
+    lua_pushstring(L, def);
+  } else {
+    path = luaL_gsub(L, path, LUA_PATHSEP LUA_PATHSEP,
+			      LUA_PATHSEP AUXMARK LUA_PATHSEP);
+    luaL_gsub(L, path, AUXMARK, def);
+    lua_remove(L, -2);
+  }
+  setprogdir(L);
+  lua_setfield(L, -2, fieldname);
+}
+
+static const luaL_Reg package_lib[] = {
+  { "loadlib",	lj_cf_package_loadlib },
+  { "searchpath",  lj_cf_package_searchpath },
+  { "seeall",	lj_cf_package_seeall },
+  { NULL, NULL }
+};
+
+static const luaL_Reg package_global[] = {
+  { "module",	lj_cf_package_module },
+  { "require",	lj_cf_package_require },
+  { NULL, NULL }
+};
+
+static const lua_CFunction package_loaders[] =
+{
+  lj_cf_package_loader_preload,
+  lj_cf_package_loader_lua,
+  lj_cf_package_loader_c,
+  lj_cf_package_loader_croot,
+  NULL
+};
+
+LUALIB_API int luaopen_package(lua_State *L)
+{
+  int i;
+  int noenv;
+  luaL_newmetatable(L, "_LOADLIB");
+  lj_lib_pushcf(L, lj_cf_package_unloadlib, 1);
+  lua_setfield(L, -2, "__gc");
+  luaL_register(L, LUA_LOADLIBNAME, package_lib);
+  lua_pushvalue(L, -1);
+  lua_replace(L, LUA_ENVIRONINDEX);
+  lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
+  for (i = 0; package_loaders[i] != NULL; i++) {
+    lj_lib_pushcf(L, package_loaders[i], 1);
+    lua_rawseti(L, -2, i+1);
+  }
+  lua_setfield(L, -2, "loaders");
+  lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
+  noenv = lua_toboolean(L, -1);
+  lua_pop(L, 1);
+  setpath(L, "path", LUA_PATH, LUA_PATH_DEFAULT, noenv);
+  setpath(L, "cpath", LUA_CPATH, LUA_CPATH_DEFAULT, noenv);
+  lua_pushliteral(L, LUA_PATH_CONFIG);
+  lua_setfield(L, -2, "config");
+  luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
+  lua_setfield(L, -2, "loaded");
+  luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4);
+  lua_setfield(L, -2, "preload");
+  lua_pushvalue(L, LUA_GLOBALSINDEX);
+  luaL_register(L, NULL, package_global);
+  lua_pop(L, 1);
+  return 1;
+}
+

+ 940 - 0
luajit.mod/luajit/src/lib_string.c

@@ -0,0 +1,940 @@
+/*
+** String library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <stdio.h>
+
+#define lib_string_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_meta.h"
+#include "lj_state.h"
+#include "lj_ff.h"
+#include "lj_bcdump.h"
+#include "lj_char.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_string
+
+LJLIB_ASM(string_len)		LJLIB_REC(.)
+{
+  lj_lib_checkstr(L, 1);
+  return FFH_RETRY;
+}
+
+LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
+{
+  GCstr *s = lj_lib_checkstr(L, 1);
+  int32_t len = (int32_t)s->len;
+  int32_t start = lj_lib_optint(L, 2, 1);
+  int32_t stop = lj_lib_optint(L, 3, start);
+  int32_t n, i;
+  const unsigned char *p;
+  if (stop < 0) stop += len+1;
+  if (start < 0) start += len+1;
+  if (start <= 0) start = 1;
+  if (stop > len) stop = len;
+  if (start > stop) return FFH_RES(0);  /* Empty interval: return no results. */
+  start--;
+  n = stop - start;
+  if ((uint32_t)n > LUAI_MAXCSTACK)
+    lj_err_caller(L, LJ_ERR_STRSLC);
+  lj_state_checkstack(L, (MSize)n);
+  p = (const unsigned char *)strdata(s) + start;
+  for (i = 0; i < n; i++)
+    setintV(L->base + i-1, p[i]);
+  return FFH_RES(n);
+}
+
+LJLIB_ASM(string_char)
+{
+  int i, nargs = (int)(L->top - L->base);
+  char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs);
+  for (i = 1; i <= nargs; i++) {
+    int32_t k = lj_lib_checkint(L, i);
+    if (!checku8(k))
+      lj_err_arg(L, i, LJ_ERR_BADVAL);
+    buf[i-1] = (char)k;
+  }
+  setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
+  return FFH_RES(1);
+}
+
+LJLIB_ASM(string_sub)		LJLIB_REC(string_range 1)
+{
+  lj_lib_checkstr(L, 1);
+  lj_lib_checkint(L, 2);
+  setintV(L->base+2, lj_lib_optint(L, 3, -1));
+  return FFH_RETRY;
+}
+
+LJLIB_ASM(string_rep)
+{
+  GCstr *s = lj_lib_checkstr(L, 1);
+  int32_t k = lj_lib_checkint(L, 2);
+  GCstr *sep = lj_lib_optstr(L, 3);
+  int32_t len = (int32_t)s->len;
+  global_State *g = G(L);
+  int64_t tlen;
+  const char *src;
+  char *buf;
+  if (k <= 0) {
+  empty:
+    setstrV(L, L->base-1, &g->strempty);
+    return FFH_RES(1);
+  }
+  if (sep) {
+    tlen = (int64_t)len + sep->len;
+    if (tlen > LJ_MAX_STR)
+      lj_err_caller(L, LJ_ERR_STROV);
+    tlen *= k;
+    if (tlen > LJ_MAX_STR)
+      lj_err_caller(L, LJ_ERR_STROV);
+  } else {
+    tlen = (int64_t)k * len;
+    if (tlen > LJ_MAX_STR)
+      lj_err_caller(L, LJ_ERR_STROV);
+  }
+  if (tlen == 0) goto empty;
+  buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
+  src = strdata(s);
+  if (sep) {
+    tlen -= sep->len;  /* Ignore trailing separator. */
+    if (k > 1) {  /* Paste one string and one separator. */
+      int32_t i;
+      i = 0; while (i < len) *buf++ = src[i++];
+      src = strdata(sep); len = sep->len;
+      i = 0; while (i < len) *buf++ = src[i++];
+      src = g->tmpbuf.buf; len += s->len; k--;  /* Now copy that k-1 times. */
+    }
+  }
+  do {
+    int32_t i = 0;
+    do { *buf++ = src[i++]; } while (i < len);
+  } while (--k > 0);
+  setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
+  return FFH_RES(1);
+}
+
+LJLIB_ASM(string_reverse)
+{
+  GCstr *s = lj_lib_checkstr(L, 1);
+  lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
+  return FFH_RETRY;
+}
+LJLIB_ASM_(string_lower)
+LJLIB_ASM_(string_upper)
+
+/* ------------------------------------------------------------------------ */
+
+static int writer_buf(lua_State *L, const void *p, size_t size, void *b)
+{
+  luaL_addlstring((luaL_Buffer *)b, (const char *)p, size);
+  UNUSED(L);
+  return 0;
+}
+
+LJLIB_CF(string_dump)
+{
+  GCfunc *fn = lj_lib_checkfunc(L, 1);
+  int strip = L->base+1 < L->top && tvistruecond(L->base+1);
+  luaL_Buffer b;
+  L->top = L->base+1;
+  luaL_buffinit(L, &b);
+  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
+    lj_err_caller(L, LJ_ERR_STRDUMP);
+  luaL_pushresult(&b);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* macro to `unsign' a character */
+#define uchar(c)        ((unsigned char)(c))
+
+#define CAP_UNFINISHED	(-1)
+#define CAP_POSITION	(-2)
+
+typedef struct MatchState {
+  const char *src_init;  /* init of source string */
+  const char *src_end;  /* end (`\0') of source string */
+  lua_State *L;
+  int level;  /* total number of captures (finished or unfinished) */
+  int depth;
+  struct {
+    const char *init;
+    ptrdiff_t len;
+  } capture[LUA_MAXCAPTURES];
+} MatchState;
+
+#define L_ESC		'%'
+#define SPECIALS	"^$*+?.([%-"
+
+static int check_capture(MatchState *ms, int l)
+{
+  l -= '1';
+  if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
+    lj_err_caller(ms->L, LJ_ERR_STRCAPI);
+  return l;
+}
+
+static int capture_to_close(MatchState *ms)
+{
+  int level = ms->level;
+  for (level--; level>=0; level--)
+    if (ms->capture[level].len == CAP_UNFINISHED) return level;
+  lj_err_caller(ms->L, LJ_ERR_STRPATC);
+  return 0;  /* unreachable */
+}
+
+static const char *classend(MatchState *ms, const char *p)
+{
+  switch (*p++) {
+  case L_ESC:
+    if (*p == '\0')
+      lj_err_caller(ms->L, LJ_ERR_STRPATE);
+    return p+1;
+  case '[':
+    if (*p == '^') p++;
+    do {  /* look for a `]' */
+      if (*p == '\0')
+	lj_err_caller(ms->L, LJ_ERR_STRPATM);
+      if (*(p++) == L_ESC && *p != '\0')
+	p++;  /* skip escapes (e.g. `%]') */
+    } while (*p != ']');
+    return p+1;
+  default:
+    return p;
+  }
+}
+
+static const unsigned char match_class_map[32] = {
+  0,LJ_CHAR_ALPHA,0,LJ_CHAR_CNTRL,LJ_CHAR_DIGIT,0,0,LJ_CHAR_GRAPH,0,0,0,0,
+  LJ_CHAR_LOWER,0,0,0,LJ_CHAR_PUNCT,0,0,LJ_CHAR_SPACE,0,
+  LJ_CHAR_UPPER,0,LJ_CHAR_ALNUM,LJ_CHAR_XDIGIT,0,0,0,0,0,0,0
+};
+
+static int match_class(int c, int cl)
+{
+  if ((cl & 0xc0) == 0x40) {
+    int t = match_class_map[(cl&0x1f)];
+    if (t) {
+      t = lj_char_isa(c, t);
+      return (cl & 0x20) ? t : !t;
+    }
+    if (cl == 'z') return c == 0;
+    if (cl == 'Z') return c != 0;
+  }
+  return (cl == c);
+}
+
+static int matchbracketclass(int c, const char *p, const char *ec)
+{
+  int sig = 1;
+  if (*(p+1) == '^') {
+    sig = 0;
+    p++;  /* skip the `^' */
+  }
+  while (++p < ec) {
+    if (*p == L_ESC) {
+      p++;
+      if (match_class(c, uchar(*p)))
+	return sig;
+    }
+    else if ((*(p+1) == '-') && (p+2 < ec)) {
+      p+=2;
+      if (uchar(*(p-2)) <= c && c <= uchar(*p))
+	return sig;
+    }
+    else if (uchar(*p) == c) return sig;
+  }
+  return !sig;
+}
+
+static int singlematch(int c, const char *p, const char *ep)
+{
+  switch (*p) {
+  case '.': return 1;  /* matches any char */
+  case L_ESC: return match_class(c, uchar(*(p+1)));
+  case '[': return matchbracketclass(c, p, ep-1);
+  default:  return (uchar(*p) == c);
+  }
+}
+
+static const char *match(MatchState *ms, const char *s, const char *p);
+
+static const char *matchbalance(MatchState *ms, const char *s, const char *p)
+{
+  if (*p == 0 || *(p+1) == 0)
+    lj_err_caller(ms->L, LJ_ERR_STRPATU);
+  if (*s != *p) {
+    return NULL;
+  } else {
+    int b = *p;
+    int e = *(p+1);
+    int cont = 1;
+    while (++s < ms->src_end) {
+      if (*s == e) {
+	if (--cont == 0) return s+1;
+      } else if (*s == b) {
+	cont++;
+      }
+    }
+  }
+  return NULL;  /* string ends out of balance */
+}
+
+static const char *max_expand(MatchState *ms, const char *s,
+			      const char *p, const char *ep)
+{
+  ptrdiff_t i = 0;  /* counts maximum expand for item */
+  while ((s+i)<ms->src_end && singlematch(uchar(*(s+i)), p, ep))
+    i++;
+  /* keeps trying to match with the maximum repetitions */
+  while (i>=0) {
+    const char *res = match(ms, (s+i), ep+1);
+    if (res) return res;
+    i--;  /* else didn't match; reduce 1 repetition to try again */
+  }
+  return NULL;
+}
+
+static const char *min_expand(MatchState *ms, const char *s,
+			      const char *p, const char *ep)
+{
+  for (;;) {
+    const char *res = match(ms, s, ep+1);
+    if (res != NULL)
+      return res;
+    else if (s<ms->src_end && singlematch(uchar(*s), p, ep))
+      s++;  /* try with one more repetition */
+    else
+      return NULL;
+  }
+}
+
+static const char *start_capture(MatchState *ms, const char *s,
+				 const char *p, int what)
+{
+  const char *res;
+  int level = ms->level;
+  if (level >= LUA_MAXCAPTURES) lj_err_caller(ms->L, LJ_ERR_STRCAPN);
+  ms->capture[level].init = s;
+  ms->capture[level].len = what;
+  ms->level = level+1;
+  if ((res=match(ms, s, p)) == NULL)  /* match failed? */
+    ms->level--;  /* undo capture */
+  return res;
+}
+
+static const char *end_capture(MatchState *ms, const char *s,
+			       const char *p)
+{
+  int l = capture_to_close(ms);
+  const char *res;
+  ms->capture[l].len = s - ms->capture[l].init;  /* close capture */
+  if ((res = match(ms, s, p)) == NULL)  /* match failed? */
+    ms->capture[l].len = CAP_UNFINISHED;  /* undo capture */
+  return res;
+}
+
+static const char *match_capture(MatchState *ms, const char *s, int l)
+{
+  size_t len;
+  l = check_capture(ms, l);
+  len = (size_t)ms->capture[l].len;
+  if ((size_t)(ms->src_end-s) >= len &&
+      memcmp(ms->capture[l].init, s, len) == 0)
+    return s+len;
+  else
+    return NULL;
+}
+
+static const char *match(MatchState *ms, const char *s, const char *p)
+{
+  if (++ms->depth > LJ_MAX_XLEVEL)
+    lj_err_caller(ms->L, LJ_ERR_STRPATX);
+  init: /* using goto's to optimize tail recursion */
+  switch (*p) {
+  case '(':  /* start capture */
+    if (*(p+1) == ')')  /* position capture? */
+      s = start_capture(ms, s, p+2, CAP_POSITION);
+    else
+      s = start_capture(ms, s, p+1, CAP_UNFINISHED);
+    break;
+  case ')':  /* end capture */
+    s = end_capture(ms, s, p+1);
+    break;
+  case L_ESC:
+    switch (*(p+1)) {
+    case 'b':  /* balanced string? */
+      s = matchbalance(ms, s, p+2);
+      if (s == NULL) break;
+      p+=4;
+      goto init;  /* else s = match(ms, s, p+4); */
+    case 'f': {  /* frontier? */
+      const char *ep; char previous;
+      p += 2;
+      if (*p != '[')
+	lj_err_caller(ms->L, LJ_ERR_STRPATB);
+      ep = classend(ms, p);  /* points to what is next */
+      previous = (s == ms->src_init) ? '\0' : *(s-1);
+      if (matchbracketclass(uchar(previous), p, ep-1) ||
+	 !matchbracketclass(uchar(*s), p, ep-1)) { s = NULL; break; }
+      p=ep;
+      goto init;  /* else s = match(ms, s, ep); */
+      }
+    default:
+      if (lj_char_isdigit(uchar(*(p+1)))) {  /* capture results (%0-%9)? */
+	s = match_capture(ms, s, uchar(*(p+1)));
+	if (s == NULL) break;
+	p+=2;
+	goto init;  /* else s = match(ms, s, p+2) */
+      }
+      goto dflt;  /* case default */
+    }
+    break;
+  case '\0':  /* end of pattern */
+    break;  /* match succeeded */
+  case '$':
+    /* is the `$' the last char in pattern? */
+    if (*(p+1) != '\0') goto dflt;
+    if (s != ms->src_end) s = NULL;  /* check end of string */
+    break;
+  default: dflt: {  /* it is a pattern item */
+    const char *ep = classend(ms, p);  /* points to what is next */
+    int m = s<ms->src_end && singlematch(uchar(*s), p, ep);
+    switch (*ep) {
+    case '?': {  /* optional */
+      const char *res;
+      if (m && ((res=match(ms, s+1, ep+1)) != NULL)) {
+	s = res;
+	break;
+      }
+      p=ep+1;
+      goto init;  /* else s = match(ms, s, ep+1); */
+      }
+    case '*':  /* 0 or more repetitions */
+      s = max_expand(ms, s, p, ep);
+      break;
+    case '+':  /* 1 or more repetitions */
+      s = (m ? max_expand(ms, s+1, p, ep) : NULL);
+      break;
+    case '-':  /* 0 or more repetitions (minimum) */
+      s = min_expand(ms, s, p, ep);
+      break;
+    default:
+      if (m) { s++; p=ep; goto init; }  /* else s = match(ms, s+1, ep); */
+      s = NULL;
+      break;
+    }
+    break;
+    }
+  }
+  ms->depth--;
+  return s;
+}
+
+static const char *lmemfind(const char *s1, size_t l1,
+			    const char *s2, size_t l2)
+{
+  if (l2 == 0) {
+    return s1;  /* empty strings are everywhere */
+  } else if (l2 > l1) {
+    return NULL;  /* avoids a negative `l1' */
+  } else {
+    const char *init;  /* to search for a `*s2' inside `s1' */
+    l2--;  /* 1st char will be checked by `memchr' */
+    l1 = l1-l2;  /* `s2' cannot be found after that */
+    while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
+      init++;   /* 1st char is already checked */
+      if (memcmp(init, s2+1, l2) == 0) {
+	return init-1;
+      } else {  /* correct `l1' and `s1' to try again */
+	l1 -= (size_t)(init-s1);
+	s1 = init;
+      }
+    }
+    return NULL;  /* not found */
+  }
+}
+
+static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
+{
+  if (i >= ms->level) {
+    if (i == 0)  /* ms->level == 0, too */
+      lua_pushlstring(ms->L, s, (size_t)(e - s));  /* add whole match */
+    else
+      lj_err_caller(ms->L, LJ_ERR_STRCAPI);
+  } else {
+    ptrdiff_t l = ms->capture[i].len;
+    if (l == CAP_UNFINISHED) lj_err_caller(ms->L, LJ_ERR_STRCAPU);
+    if (l == CAP_POSITION)
+      lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1);
+    else
+      lua_pushlstring(ms->L, ms->capture[i].init, (size_t)l);
+  }
+}
+
+static int push_captures(MatchState *ms, const char *s, const char *e)
+{
+  int i;
+  int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
+  luaL_checkstack(ms->L, nlevels, "too many captures");
+  for (i = 0; i < nlevels; i++)
+    push_onecapture(ms, i, s, e);
+  return nlevels;  /* number of strings pushed */
+}
+
+static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
+{
+  /* relative string position: negative means back from end */
+  if (pos < 0) pos += (ptrdiff_t)len + 1;
+  return (pos >= 0) ? pos : 0;
+}
+
+static int str_find_aux(lua_State *L, int find)
+{
+  size_t l1, l2;
+  const char *s = luaL_checklstring(L, 1, &l1);
+  const char *p = luaL_checklstring(L, 2, &l2);
+  ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
+  if (init < 0) {
+    init = 0;
+  } else if ((size_t)(init) > l1) {
+#if LJ_52
+    setnilV(L->top-1);
+    return 1;
+#else
+    init = (ptrdiff_t)l1;
+#endif
+  }
+  if (find && (lua_toboolean(L, 4) ||  /* explicit request? */
+      strpbrk(p, SPECIALS) == NULL)) {  /* or no special characters? */
+    /* do a plain search */
+    const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
+    if (s2) {
+      lua_pushinteger(L, s2-s+1);
+      lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
+      return 2;
+    }
+  } else {
+    MatchState ms;
+    int anchor = (*p == '^') ? (p++, 1) : 0;
+    const char *s1=s+init;
+    ms.L = L;
+    ms.src_init = s;
+    ms.src_end = s+l1;
+    do {
+      const char *res;
+      ms.level = ms.depth = 0;
+      if ((res=match(&ms, s1, p)) != NULL) {
+	if (find) {
+	  lua_pushinteger(L, s1-s+1);  /* start */
+	  lua_pushinteger(L, res-s);   /* end */
+	  return push_captures(&ms, NULL, 0) + 2;
+	} else {
+	  return push_captures(&ms, s1, res);
+	}
+      }
+    } while (s1++ < ms.src_end && !anchor);
+  }
+  lua_pushnil(L);  /* not found */
+  return 1;
+}
+
+LJLIB_CF(string_find)
+{
+  return str_find_aux(L, 1);
+}
+
+LJLIB_CF(string_match)
+{
+  return str_find_aux(L, 0);
+}
+
+LJLIB_NOREG LJLIB_CF(string_gmatch_aux)
+{
+  const char *p = strVdata(lj_lib_upvalue(L, 2));
+  GCstr *str = strV(lj_lib_upvalue(L, 1));
+  const char *s = strdata(str);
+  TValue *tvpos = lj_lib_upvalue(L, 3);
+  const char *src = s + tvpos->u32.lo;
+  MatchState ms;
+  ms.L = L;
+  ms.src_init = s;
+  ms.src_end = s + str->len;
+  for (; src <= ms.src_end; src++) {
+    const char *e;
+    ms.level = ms.depth = 0;
+    if ((e = match(&ms, src, p)) != NULL) {
+      int32_t pos = (int32_t)(e - s);
+      if (e == src) pos++;  /* Ensure progress for empty match. */
+      tvpos->u32.lo = (uint32_t)pos;
+      return push_captures(&ms, src, e);
+    }
+  }
+  return 0;  /* not found */
+}
+
+LJLIB_CF(string_gmatch)
+{
+  lj_lib_checkstr(L, 1);
+  lj_lib_checkstr(L, 2);
+  L->top = L->base+3;
+  (L->top-1)->u64 = 0;
+  lj_lib_pushcc(L, lj_cf_string_gmatch_aux, FF_string_gmatch_aux, 3);
+  return 1;
+}
+
+static void add_s(MatchState *ms, luaL_Buffer *b, const char *s, const char *e)
+{
+  size_t l, i;
+  const char *news = lua_tolstring(ms->L, 3, &l);
+  for (i = 0; i < l; i++) {
+    if (news[i] != L_ESC) {
+      luaL_addchar(b, news[i]);
+    } else {
+      i++;  /* skip ESC */
+      if (!lj_char_isdigit(uchar(news[i]))) {
+	luaL_addchar(b, news[i]);
+      } else if (news[i] == '0') {
+	luaL_addlstring(b, s, (size_t)(e - s));
+      } else {
+	push_onecapture(ms, news[i] - '1', s, e);
+	luaL_addvalue(b);  /* add capture to accumulated result */
+      }
+    }
+  }
+}
+
+static void add_value(MatchState *ms, luaL_Buffer *b,
+		      const char *s, const char *e)
+{
+  lua_State *L = ms->L;
+  switch (lua_type(L, 3)) {
+    case LUA_TNUMBER:
+    case LUA_TSTRING: {
+      add_s(ms, b, s, e);
+      return;
+    }
+    case LUA_TFUNCTION: {
+      int n;
+      lua_pushvalue(L, 3);
+      n = push_captures(ms, s, e);
+      lua_call(L, n, 1);
+      break;
+    }
+    case LUA_TTABLE: {
+      push_onecapture(ms, 0, s, e);
+      lua_gettable(L, 3);
+      break;
+    }
+  }
+  if (!lua_toboolean(L, -1)) {  /* nil or false? */
+    lua_pop(L, 1);
+    lua_pushlstring(L, s, (size_t)(e - s));  /* keep original text */
+  } else if (!lua_isstring(L, -1)) {
+    lj_err_callerv(L, LJ_ERR_STRGSRV, luaL_typename(L, -1));
+  }
+  luaL_addvalue(b);  /* add result to accumulator */
+}
+
+LJLIB_CF(string_gsub)
+{
+  size_t srcl;
+  const char *src = luaL_checklstring(L, 1, &srcl);
+  const char *p = luaL_checkstring(L, 2);
+  int  tr = lua_type(L, 3);
+  int max_s = luaL_optint(L, 4, (int)(srcl+1));
+  int anchor = (*p == '^') ? (p++, 1) : 0;
+  int n = 0;
+  MatchState ms;
+  luaL_Buffer b;
+  if (!(tr == LUA_TNUMBER || tr == LUA_TSTRING ||
+	tr == LUA_TFUNCTION || tr == LUA_TTABLE))
+    lj_err_arg(L, 3, LJ_ERR_NOSFT);
+  luaL_buffinit(L, &b);
+  ms.L = L;
+  ms.src_init = src;
+  ms.src_end = src+srcl;
+  while (n < max_s) {
+    const char *e;
+    ms.level = ms.depth = 0;
+    e = match(&ms, src, p);
+    if (e) {
+      n++;
+      add_value(&ms, &b, src, e);
+    }
+    if (e && e>src) /* non empty match? */
+      src = e;  /* skip it */
+    else if (src < ms.src_end)
+      luaL_addchar(&b, *src++);
+    else
+      break;
+    if (anchor)
+      break;
+  }
+  luaL_addlstring(&b, src, (size_t)(ms.src_end-src));
+  luaL_pushresult(&b);
+  lua_pushinteger(L, n);  /* number of substitutions */
+  return 2;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
+#define MAX_FMTITEM	512
+/* valid flags in a format specification */
+#define FMT_FLAGS	"-+ #0"
+/*
+** maximum size of each format specification (such as '%-099.99d')
+** (+10 accounts for %99.99x plus margin of error)
+*/
+#define MAX_FMTSPEC	(sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
+
+static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
+{
+  GCstr *str = lj_lib_checkstr(L, arg);
+  int32_t len = (int32_t)str->len;
+  const char *s = strdata(str);
+  luaL_addchar(b, '"');
+  while (len--) {
+    uint32_t c = uchar(*s);
+    if (c == '"' || c == '\\' || c == '\n') {
+      luaL_addchar(b, '\\');
+    } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
+      uint32_t d;
+      luaL_addchar(b, '\\');
+      if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
+	luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
+	goto tens;
+      } else if (c >= 10) {
+      tens:
+	d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
+      }
+      c += '0';
+    }
+    luaL_addchar(b, c);
+    s++;
+  }
+  luaL_addchar(b, '"');
+}
+
+static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
+{
+  const char *p = strfrmt;
+  while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++;  /* skip flags */
+  if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
+    lj_err_caller(L, LJ_ERR_STRFMTR);
+  if (lj_char_isdigit(uchar(*p))) p++;  /* skip width */
+  if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
+  if (*p == '.') {
+    p++;
+    if (lj_char_isdigit(uchar(*p))) p++;  /* skip precision */
+    if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
+  }
+  if (lj_char_isdigit(uchar(*p)))
+    lj_err_caller(L, LJ_ERR_STRFMTW);
+  *(form++) = '%';
+  strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
+  form += p - strfrmt + 1;
+  *form = '\0';
+  return p;
+}
+
+static void addintlen(char *form)
+{
+  size_t l = strlen(form);
+  char spec = form[l - 1];
+  strcpy(form + l - 1, LUA_INTFRMLEN);
+  form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
+  form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
+}
+
+static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
+{
+  if (sizeof(LUA_INTFRM_T) == 4) {
+    return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
+  } else {
+    cTValue *o;
+    lj_lib_checknumber(L, arg);
+    o = L->base+arg-1;
+    if (tvisint(o))
+      return (LUA_INTFRM_T)intV(o);
+    else
+      return (LUA_INTFRM_T)numV(o);
+  }
+}
+
+static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
+{
+  if (sizeof(LUA_INTFRM_T) == 4) {
+    return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
+  } else {
+    cTValue *o;
+    lj_lib_checknumber(L, arg);
+    o = L->base+arg-1;
+    if (tvisint(o))
+      return (unsigned LUA_INTFRM_T)intV(o);
+    else if ((int32_t)o->u32.hi < 0)
+      return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
+    else
+      return (unsigned LUA_INTFRM_T)numV(o);
+  }
+}
+
+static GCstr *meta_tostring(lua_State *L, int arg)
+{
+  TValue *o = L->base+arg-1;
+  cTValue *mo;
+  lua_assert(o < L->top);  /* Caller already checks for existence. */
+  if (LJ_LIKELY(tvisstr(o)))
+    return strV(o);
+  if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+    copyTV(L, L->top++, mo);
+    copyTV(L, L->top++, o);
+    lua_call(L, 1, 1);
+    L->top--;
+    if (tvisstr(L->top))
+      return strV(L->top);
+    o = L->base+arg-1;
+    copyTV(L, o, L->top);
+  }
+  if (tvisnumber(o)) {
+    return lj_str_fromnumber(L, o);
+  } else if (tvisnil(o)) {
+    return lj_str_newlit(L, "nil");
+  } else if (tvisfalse(o)) {
+    return lj_str_newlit(L, "false");
+  } else if (tvistrue(o)) {
+    return lj_str_newlit(L, "true");
+  } else {
+    if (tvisfunc(o) && isffunc(funcV(o)))
+      lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
+    else
+      lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
+    L->top--;
+    return strV(L->top);
+  }
+}
+
+LJLIB_CF(string_format)
+{
+  int arg = 1, top = (int)(L->top - L->base);
+  GCstr *fmt = lj_lib_checkstr(L, arg);
+  const char *strfrmt = strdata(fmt);
+  const char *strfrmt_end = strfrmt + fmt->len;
+  luaL_Buffer b;
+  luaL_buffinit(L, &b);
+  while (strfrmt < strfrmt_end) {
+    if (*strfrmt != L_ESC) {
+      luaL_addchar(&b, *strfrmt++);
+    } else if (*++strfrmt == L_ESC) {
+      luaL_addchar(&b, *strfrmt++);  /* %% */
+    } else { /* format item */
+      char form[MAX_FMTSPEC];  /* to store the format (`%...') */
+      char buff[MAX_FMTITEM];  /* to store the formatted item */
+      if (++arg > top)
+	luaL_argerror(L, arg, lj_obj_typename[0]);
+      strfrmt = scanformat(L, strfrmt, form);
+      switch (*strfrmt++) {
+      case 'c':
+	sprintf(buff, form, lj_lib_checkint(L, arg));
+	break;
+      case 'd':  case 'i':
+	addintlen(form);
+	sprintf(buff, form, num2intfrm(L, arg));
+	break;
+      case 'o':  case 'u':  case 'x':  case 'X':
+	addintlen(form);
+	sprintf(buff, form, num2uintfrm(L, arg));
+	break;
+      case 'e':  case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
+	TValue tv;
+	tv.n = lj_lib_checknum(L, arg);
+	if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
+	  /* Canonicalize output of non-finite values. */
+	  char *p, nbuf[LJ_STR_NUMBUF];
+	  size_t len = lj_str_bufnum(nbuf, &tv);
+	  if (strfrmt[-1] < 'a') {
+	    nbuf[len-3] = nbuf[len-3] - 0x20;
+	    nbuf[len-2] = nbuf[len-2] - 0x20;
+	    nbuf[len-1] = nbuf[len-1] - 0x20;
+	  }
+	  nbuf[len] = '\0';
+	  for (p = form; *p < 'A' && *p != '.'; p++) ;
+	  *p++ = 's'; *p = '\0';
+	  sprintf(buff, form, nbuf);
+	  break;
+	}
+	sprintf(buff, form, (double)tv.n);
+	break;
+	}
+      case 'q':
+	addquoted(L, &b, arg);
+	continue;
+      case 'p':
+	lj_str_pushf(L, "%p", lua_topointer(L, arg));
+	luaL_addvalue(&b);
+	continue;
+      case 's': {
+	GCstr *str = meta_tostring(L, arg);
+	if (!strchr(form, '.') && str->len >= 100) {
+	  /* no precision and string is too long to be formatted;
+	     keep original string */
+	  setstrV(L, L->top++, str);
+	  luaL_addvalue(&b);
+	  continue;
+	}
+	sprintf(buff, form, strdata(str));
+	break;
+	}
+      default:
+	lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
+	break;
+      }
+      luaL_addlstring(&b, buff, strlen(buff));
+    }
+  }
+  luaL_pushresult(&b);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_string(lua_State *L)
+{
+  GCtab *mt;
+  global_State *g;
+  LJ_LIB_REG(L, LUA_STRLIBNAME, string);
+#if defined(LUA_COMPAT_GFIND) && !LJ_52
+  lua_getfield(L, -1, "gmatch");
+  lua_setfield(L, -2, "gfind");
+#endif
+  mt = lj_tab_new(L, 0, 1);
+  /* NOBARRIER: basemt is a GC root. */
+  g = G(L);
+  setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
+  settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1));
+  mt->nomm = (uint8_t)(~(1u<<MM_index));
+  return 1;
+}
+

+ 300 - 0
luajit.mod/luajit/src/lib_table.c

@@ -0,0 +1,300 @@
+/*
+** Table library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lib_table_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_tab.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_table
+
+LJLIB_CF(table_foreachi)
+{
+  GCtab *t = lj_lib_checktab(L, 1);
+  GCfunc *func = lj_lib_checkfunc(L, 2);
+  MSize i, n = lj_tab_len(t);
+  for (i = 1; i <= n; i++) {
+    cTValue *val;
+    setfuncV(L, L->top, func);
+    setintV(L->top+1, i);
+    val = lj_tab_getint(t, (int32_t)i);
+    if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
+    L->top += 3;
+    lua_call(L, 2, 1);
+    if (!tvisnil(L->top-1))
+      return 1;
+    L->top--;
+  }
+  return 0;
+}
+
+LJLIB_CF(table_foreach)
+{
+  GCtab *t = lj_lib_checktab(L, 1);
+  GCfunc *func = lj_lib_checkfunc(L, 2);
+  L->top = L->base+3;
+  setnilV(L->top-1);
+  while (lj_tab_next(L, t, L->top-1)) {
+    copyTV(L, L->top+2, L->top);
+    copyTV(L, L->top+1, L->top-1);
+    setfuncV(L, L->top, func);
+    L->top += 3;
+    lua_call(L, 2, 1);
+    if (!tvisnil(L->top-1))
+      return 1;
+    L->top--;
+  }
+  return 0;
+}
+
+LJLIB_ASM(table_getn)		LJLIB_REC(.)
+{
+  lj_lib_checktab(L, 1);
+  return FFH_UNREACHABLE;
+}
+
+LJLIB_CF(table_maxn)
+{
+  GCtab *t = lj_lib_checktab(L, 1);
+  TValue *array = tvref(t->array);
+  Node *node;
+  lua_Number m = 0;
+  ptrdiff_t i;
+  for (i = (ptrdiff_t)t->asize - 1; i >= 0; i--)
+    if (!tvisnil(&array[i])) {
+      m = (lua_Number)(int32_t)i;
+      break;
+    }
+  node = noderef(t->node);
+  for (i = (ptrdiff_t)t->hmask; i >= 0; i--)
+    if (!tvisnil(&node[i].val) && tvisnumber(&node[i].key)) {
+      lua_Number n = numberVnum(&node[i].key);
+      if (n > m) m = n;
+    }
+  setnumV(L->top-1, m);
+  return 1;
+}
+
+LJLIB_CF(table_insert)		LJLIB_REC(.)
+{
+  GCtab *t = lj_lib_checktab(L, 1);
+  int32_t n, i = (int32_t)lj_tab_len(t) + 1;
+  int nargs = (int)((char *)L->top - (char *)L->base);
+  if (nargs != 2*sizeof(TValue)) {
+    if (nargs != 3*sizeof(TValue))
+      lj_err_caller(L, LJ_ERR_TABINS);
+    /* NOBARRIER: This just moves existing elements around. */
+    for (n = lj_lib_checkint(L, 2); i > n; i--) {
+      /* The set may invalidate the get pointer, so need to do it first! */
+      TValue *dst = lj_tab_setint(L, t, i);
+      cTValue *src = lj_tab_getint(t, i-1);
+      if (src) {
+	copyTV(L, dst, src);
+      } else {
+	setnilV(dst);
+      }
+    }
+    i = n;
+  }
+  {
+    TValue *dst = lj_tab_setint(L, t, i);
+    copyTV(L, dst, L->top-1);  /* Set new value. */
+    lj_gc_barriert(L, t, dst);
+  }
+  return 0;
+}
+
+LJLIB_CF(table_remove)		LJLIB_REC(.)
+{
+  GCtab *t = lj_lib_checktab(L, 1);
+  int32_t e = (int32_t)lj_tab_len(t);
+  int32_t pos = lj_lib_optint(L, 2, e);
+  if (!(1 <= pos && pos <= e))  /* Nothing to remove? */
+    return 0;
+  lua_rawgeti(L, 1, pos);  /* Get previous value. */
+  /* NOBARRIER: This just moves existing elements around. */
+  for (; pos < e; pos++) {
+    cTValue *src = lj_tab_getint(t, pos+1);
+    TValue *dst = lj_tab_setint(L, t, pos);
+    if (src) {
+      copyTV(L, dst, src);
+    } else {
+      setnilV(dst);
+    }
+  }
+  setnilV(lj_tab_setint(L, t, e));  /* Remove (last) value. */
+  return 1;  /* Return previous value. */
+}
+
+LJLIB_CF(table_concat)
+{
+  luaL_Buffer b;
+  GCtab *t = lj_lib_checktab(L, 1);
+  GCstr *sep = lj_lib_optstr(L, 2);
+  MSize seplen = sep ? sep->len : 0;
+  int32_t i = lj_lib_optint(L, 3, 1);
+  int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
+	      lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
+  luaL_buffinit(L, &b);
+  if (i <= e) {
+    for (;;) {
+      cTValue *o;
+      lua_rawgeti(L, 1, i);
+      o = L->top-1;
+      if (!(tvisstr(o) || tvisnumber(o)))
+	lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
+      luaL_addvalue(&b);
+      if (i++ == e) break;
+      if (seplen)
+	luaL_addlstring(&b, strdata(sep), seplen);
+    }
+  }
+  luaL_pushresult(&b);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void set2(lua_State *L, int i, int j)
+{
+  lua_rawseti(L, 1, i);
+  lua_rawseti(L, 1, j);
+}
+
+static int sort_comp(lua_State *L, int a, int b)
+{
+  if (!lua_isnil(L, 2)) {  /* function? */
+    int res;
+    lua_pushvalue(L, 2);
+    lua_pushvalue(L, a-1);  /* -1 to compensate function */
+    lua_pushvalue(L, b-2);  /* -2 to compensate function and `a' */
+    lua_call(L, 2, 1);
+    res = lua_toboolean(L, -1);
+    lua_pop(L, 1);
+    return res;
+  } else {  /* a < b? */
+    return lua_lessthan(L, a, b);
+  }
+}
+
+static void auxsort(lua_State *L, int l, int u)
+{
+  while (l < u) {  /* for tail recursion */
+    int i, j;
+    /* sort elements a[l], a[(l+u)/2] and a[u] */
+    lua_rawgeti(L, 1, l);
+    lua_rawgeti(L, 1, u);
+    if (sort_comp(L, -1, -2))  /* a[u] < a[l]? */
+      set2(L, l, u);  /* swap a[l] - a[u] */
+    else
+      lua_pop(L, 2);
+    if (u-l == 1) break;  /* only 2 elements */
+    i = (l+u)/2;
+    lua_rawgeti(L, 1, i);
+    lua_rawgeti(L, 1, l);
+    if (sort_comp(L, -2, -1)) {  /* a[i]<a[l]? */
+      set2(L, i, l);
+    } else {
+      lua_pop(L, 1);  /* remove a[l] */
+      lua_rawgeti(L, 1, u);
+      if (sort_comp(L, -1, -2))  /* a[u]<a[i]? */
+	set2(L, i, u);
+      else
+	lua_pop(L, 2);
+    }
+    if (u-l == 2) break;  /* only 3 elements */
+    lua_rawgeti(L, 1, i);  /* Pivot */
+    lua_pushvalue(L, -1);
+    lua_rawgeti(L, 1, u-1);
+    set2(L, i, u-1);
+    /* a[l] <= P == a[u-1] <= a[u], only need to sort from l+1 to u-2 */
+    i = l; j = u-1;
+    for (;;) {  /* invariant: a[l..i] <= P <= a[j..u] */
+      /* repeat ++i until a[i] >= P */
+      while (lua_rawgeti(L, 1, ++i), sort_comp(L, -1, -2)) {
+	if (i>=u) lj_err_caller(L, LJ_ERR_TABSORT);
+	lua_pop(L, 1);  /* remove a[i] */
+      }
+      /* repeat --j until a[j] <= P */
+      while (lua_rawgeti(L, 1, --j), sort_comp(L, -3, -1)) {
+	if (j<=l) lj_err_caller(L, LJ_ERR_TABSORT);
+	lua_pop(L, 1);  /* remove a[j] */
+      }
+      if (j<i) {
+	lua_pop(L, 3);  /* pop pivot, a[i], a[j] */
+	break;
+      }
+      set2(L, i, j);
+    }
+    lua_rawgeti(L, 1, u-1);
+    lua_rawgeti(L, 1, i);
+    set2(L, u-1, i);  /* swap pivot (a[u-1]) with a[i] */
+    /* a[l..i-1] <= a[i] == P <= a[i+1..u] */
+    /* adjust so that smaller half is in [j..i] and larger one in [l..u] */
+    if (i-l < u-i) {
+      j=l; i=i-1; l=i+2;
+    } else {
+      j=i+1; i=u; u=j-2;
+    }
+    auxsort(L, j, i);  /* call recursively the smaller one */
+  }  /* repeat the routine for the larger one */
+}
+
+LJLIB_CF(table_sort)
+{
+  GCtab *t = lj_lib_checktab(L, 1);
+  int32_t n = (int32_t)lj_tab_len(t);
+  lua_settop(L, 2);
+  if (!tvisnil(L->base+1))
+    lj_lib_checkfunc(L, 2);
+  auxsort(L, 1, n);
+  return 0;
+}
+
+#if LJ_52
+LJLIB_PUSH("n")
+LJLIB_CF(table_pack)
+{
+  TValue *array, *base = L->base;
+  MSize i, n = (uint32_t)(L->top - base);
+  GCtab *t = lj_tab_new(L, n ? n+1 : 0, 1);
+  /* NOBARRIER: The table is new (marked white). */
+  setintV(lj_tab_setstr(L, t, strV(lj_lib_upvalue(L, 1))), (int32_t)n);
+  for (array = tvref(t->array) + 1, i = 0; i < n; i++)
+    copyTV(L, &array[i], &base[i]);
+  settabV(L, base, t);
+  L->top = base+1;
+  lj_gc_check(L);
+  return 1;
+}
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_table(lua_State *L)
+{
+  LJ_LIB_REG(L, LUA_TABLIBNAME, table);
+#if LJ_52
+  lua_getglobal(L, "unpack");
+  lua_setfield(L, -2, "unpack");
+#endif
+  return 1;
+}
+

+ 26 - 0
luajit.mod/luajit/src/lj.supp

@@ -0,0 +1,26 @@
+# Valgrind suppression file for LuaJIT 2.0.
+{
+   Optimized string compare
+   Memcheck:Addr4
+   fun:lj_str_cmp
+}
+{
+   Optimized string compare
+   Memcheck:Addr1
+   fun:lj_str_cmp
+}
+{
+   Optimized string compare
+   Memcheck:Addr4
+   fun:lj_str_new
+}
+{
+   Optimized string compare
+   Memcheck:Addr1
+   fun:lj_str_new
+}
+{
+   Optimized string compare
+   Memcheck:Cond
+   fun:lj_str_new
+}

+ 1396 - 0
luajit.mod/luajit/src/lj_alloc.c

@@ -0,0 +1,1396 @@
+/*
+** Bundled memory allocator.
+**
+** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc.
+** The original bears the following remark:
+**
+**   This is a version (aka dlmalloc) of malloc/free/realloc written by
+**   Doug Lea and released to the public domain, as explained at
+**   http://creativecommons.org/licenses/publicdomain.
+**
+**   * Version pre-2.8.4 Wed Mar 29 19:46:29 2006    (dl at gee)
+**
+** No additional copyright is claimed over the customizations.
+** Please do NOT bother the original author about this version here!
+**
+** If you want to use dlmalloc in another project, you should get
+** the original from: ftp://gee.cs.oswego.edu/pub/misc/
+** For thread-safe derivatives, take a look at:
+** - ptmalloc: http://www.malloc.de/
+** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/
+*/
+
+#define lj_alloc_c
+#define LUA_CORE
+
+/* To get the mremap prototype. Must be defined before any system includes. */
+#if defined(__linux__) && !defined(_GNU_SOURCE)
+#define _GNU_SOURCE
+#endif
+
+#include "lj_def.h"
+#include "lj_arch.h"
+#include "lj_alloc.h"
+
+#ifndef LUAJIT_USE_SYSMALLOC
+
+#define MAX_SIZE_T		(~(size_t)0)
+#define MALLOC_ALIGNMENT	((size_t)8U)
+
+#define DEFAULT_GRANULARITY	((size_t)128U * (size_t)1024U)
+#define DEFAULT_TRIM_THRESHOLD	((size_t)2U * (size_t)1024U * (size_t)1024U)
+#define DEFAULT_MMAP_THRESHOLD	((size_t)128U * (size_t)1024U)
+#define MAX_RELEASE_CHECK_RATE	255
+
+/* ------------------- size_t and alignment properties -------------------- */
+
+/* The byte and bit size of a size_t */
+#define SIZE_T_SIZE		(sizeof(size_t))
+#define SIZE_T_BITSIZE		(sizeof(size_t) << 3)
+
+/* Some constants coerced to size_t */
+/* Annoying but necessary to avoid errors on some platforms */
+#define SIZE_T_ZERO		((size_t)0)
+#define SIZE_T_ONE		((size_t)1)
+#define SIZE_T_TWO		((size_t)2)
+#define TWO_SIZE_T_SIZES	(SIZE_T_SIZE<<1)
+#define FOUR_SIZE_T_SIZES	(SIZE_T_SIZE<<2)
+#define SIX_SIZE_T_SIZES	(FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
+
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
+#define CHUNK_ALIGN_MASK	(MALLOC_ALIGNMENT - SIZE_T_ONE)
+
+/* the number of bytes to offset an address to align it */
+#define align_offset(A)\
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
+  ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
+
+/* -------------------------- MMAP support ------------------------------- */
+
+#define MFAIL			((void *)(MAX_SIZE_T))
+#define CMFAIL			((char *)(MFAIL)) /* defined for convenience */
+
+#define IS_DIRECT_BIT		(SIZE_T_ONE)
+
+#if LJ_TARGET_WINDOWS
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#if LJ_64
+
+/* Undocumented, but hey, that's what we all love so much about Windows. */
+typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
+		       size_t *size, ULONG alloctype, ULONG prot);
+static PNTAVM ntavm;
+
+/* Number of top bits of the lower 32 bits of an address that must be zero.
+** Apparently 0 gives us full 64 bit addresses and 1 gives us the lower 2GB.
+*/
+#define NTAVM_ZEROBITS		1
+
+static void INIT_MMAP(void)
+{
+  ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
+				 "NtAllocateVirtualMemory");
+}
+
+/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
+static LJ_AINLINE void *CALL_MMAP(size_t size)
+{
+  DWORD olderr = GetLastError();
+  void *ptr = NULL;
+  long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size,
+		  MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+  SetLastError(olderr);
+  return st == 0 ? ptr : MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static LJ_AINLINE void *DIRECT_MMAP(size_t size)
+{
+  DWORD olderr = GetLastError();
+  void *ptr = NULL;
+  long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size,
+		  MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, PAGE_READWRITE);
+  SetLastError(olderr);
+  return st == 0 ? ptr : MFAIL;
+}
+
+#else
+
+#define INIT_MMAP()		((void)0)
+
+/* Win32 MMAP via VirtualAlloc */
+static LJ_AINLINE void *CALL_MMAP(size_t size)
+{
+  DWORD olderr = GetLastError();
+  void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+  SetLastError(olderr);
+  return ptr ? ptr : MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static LJ_AINLINE void *DIRECT_MMAP(size_t size)
+{
+  DWORD olderr = GetLastError();
+  void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+			   PAGE_READWRITE);
+  SetLastError(olderr);
+  return ptr ? ptr : MFAIL;
+}
+
+#endif
+
+/* This function supports releasing coalesed segments */
+static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
+{
+  DWORD olderr = GetLastError();
+  MEMORY_BASIC_INFORMATION minfo;
+  char *cptr = (char *)ptr;
+  while (size) {
+    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+      return -1;
+    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+	minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+      return -1;
+    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+      return -1;
+    cptr += minfo.RegionSize;
+    size -= minfo.RegionSize;
+  }
+  SetLastError(olderr);
+  return 0;
+}
+
+#else
+
+#include <errno.h>
+#include <sys/mman.h>
+
+#define MMAP_PROT		(PROT_READ|PROT_WRITE)
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS		MAP_ANON
+#endif
+#define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
+
+#if LJ_64
+/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
+
+#if defined(MAP_32BIT)
+
+#if defined(__sun__)
+#define MMAP_REGION_START	((uintptr_t)0x1000)
+#else
+/* Actually this only gives us max. 1GB in current Linux kernels. */
+#define MMAP_REGION_START	((uintptr_t)0)
+#endif
+
+static LJ_AINLINE void *CALL_MMAP(size_t size)
+{
+  int olderr = errno;
+  void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
+  errno = olderr;
+  return ptr;
+}
+
+#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__)
+
+/* OSX and FreeBSD mmap() use a naive first-fit linear search.
+** That's perfect for us. Except that -pagezero_size must be set for OSX,
+** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
+** to be reduced to 250MB on FreeBSD.
+*/
+#if LJ_TARGET_OSX || defined(__DragonFly__)
+#define MMAP_REGION_START	((uintptr_t)0x10000)
+#elif LJ_TARGET_PS4
+#define MMAP_REGION_START	((uintptr_t)0x4000)
+#else
+#define MMAP_REGION_START	((uintptr_t)0x10000000)
+#endif
+#define MMAP_REGION_END		((uintptr_t)0x80000000)
+
+#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
+#include <sys/resource.h>
+#endif
+
+static LJ_AINLINE void *CALL_MMAP(size_t size)
+{
+  int olderr = errno;
+  /* Hint for next allocation. Doesn't need to be thread-safe. */
+  static uintptr_t alloc_hint = MMAP_REGION_START;
+  int retry = 0;
+#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
+  static int rlimit_modified = 0;
+  if (LJ_UNLIKELY(rlimit_modified == 0)) {
+    struct rlimit rlim;
+    rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
+    setrlimit(RLIMIT_DATA, &rlim);  /* Ignore result. May fail below. */
+    rlimit_modified = 1;
+  }
+#endif
+  for (;;) {
+    void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
+    if ((uintptr_t)p >= MMAP_REGION_START &&
+	(uintptr_t)p + size < MMAP_REGION_END) {
+      alloc_hint = (uintptr_t)p + size;
+      errno = olderr;
+      return p;
+    }
+    if (p != CMFAIL) munmap(p, size);
+#if defined(__sun__) || defined(__DragonFly__)
+    alloc_hint += 0x1000000;  /* Need near-exhaustive linear scan. */
+    if (alloc_hint + size < MMAP_REGION_END) continue;
+#endif
+    if (retry) break;
+    retry = 1;
+    alloc_hint = MMAP_REGION_START;
+  }
+  errno = olderr;
+  return CMFAIL;
+}
+
+#else
+
+#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
+
+#endif
+
+#else
+
+/* 32 bit mode is easy. */
+static LJ_AINLINE void *CALL_MMAP(size_t size)
+{
+  int olderr = errno;
+  void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
+  errno = olderr;
+  return ptr;
+}
+
+#endif
+
+#define INIT_MMAP()		((void)0)
+#define DIRECT_MMAP(s)		CALL_MMAP(s)
+
+static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
+{
+  int olderr = errno;
+  int ret = munmap(ptr, size);
+  errno = olderr;
+  return ret;
+}
+
+#if LJ_TARGET_LINUX
+/* Need to define _GNU_SOURCE to get the mremap prototype. */
+static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
+				     int flags)
+{
+  int olderr = errno;
+  ptr = mremap(ptr, osz, nsz, flags);
+  errno = olderr;
+  return ptr;
+}
+
+#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
+#define CALL_MREMAP_NOMOVE	0
+#define CALL_MREMAP_MAYMOVE	1
+#if LJ_64
+#define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
+#else
+#define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE
+#endif
+#endif
+
+#endif
+
+#ifndef CALL_MREMAP
+#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
+#endif
+
+/* -----------------------  Chunk representations ------------------------ */
+
+struct malloc_chunk {
+  size_t               prev_foot;  /* Size of previous chunk (if free).  */
+  size_t               head;       /* Size and inuse bits. */
+  struct malloc_chunk *fd;         /* double links -- used only if free. */
+  struct malloc_chunk *bk;
+};
+
+typedef struct malloc_chunk  mchunk;
+typedef struct malloc_chunk *mchunkptr;
+typedef struct malloc_chunk *sbinptr;  /* The type of bins of chunks */
+typedef size_t bindex_t;               /* Described below */
+typedef unsigned int binmap_t;         /* Described below */
+typedef unsigned int flag_t;           /* The type of various bit flag sets */
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#define MCHUNK_SIZE		(sizeof(mchunk))
+
+#define CHUNK_OVERHEAD		(SIZE_T_SIZE)
+
+/* Direct chunks need a second word of overhead ... */
+#define DIRECT_CHUNK_OVERHEAD	(TWO_SIZE_T_SIZES)
+/* ... and additional padding for fake next-chunk at foot */
+#define DIRECT_FOOT_PAD		(FOUR_SIZE_T_SIZES)
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MIN_CHUNK_SIZE\
+  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk2mem(p)		((void *)((char *)(p) + TWO_SIZE_T_SIZES))
+#define mem2chunk(mem)		((mchunkptr)((char *)(mem) - TWO_SIZE_T_SIZES))
+/* chunk associated with aligned address A */
+#define align_as_chunk(A)	(mchunkptr)((A) + align_offset(chunk2mem(A)))
+
+/* Bounds on request (not chunk) sizes. */
+#define MAX_REQUEST		((~MIN_CHUNK_SIZE+1) << 2)
+#define MIN_REQUEST		(MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+
+/* pad request bytes into a usable size */
+#define pad_request(req) \
+   (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* pad request, checking for minimum (but not maximum) */
+#define request2size(req) \
+  (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+#define PINUSE_BIT		(SIZE_T_ONE)
+#define CINUSE_BIT		(SIZE_T_TWO)
+#define INUSE_BITS		(PINUSE_BIT|CINUSE_BIT)
+
+/* Head value for fenceposts */
+#define FENCEPOST_HEAD		(INUSE_BITS|SIZE_T_SIZE)
+
+/* extraction of fields from head words */
+#define cinuse(p)		((p)->head & CINUSE_BIT)
+#define pinuse(p)		((p)->head & PINUSE_BIT)
+#define chunksize(p)		((p)->head & ~(INUSE_BITS))
+
+#define clear_pinuse(p)		((p)->head &= ~PINUSE_BIT)
+#define clear_cinuse(p)		((p)->head &= ~CINUSE_BIT)
+
+/* Treat space at ptr +/- offset as a chunk */
+#define chunk_plus_offset(p, s)		((mchunkptr)(((char *)(p)) + (s)))
+#define chunk_minus_offset(p, s)	((mchunkptr)(((char *)(p)) - (s)))
+
+/* Ptr to next or previous physical malloc_chunk. */
+#define next_chunk(p)	((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS)))
+#define prev_chunk(p)	((mchunkptr)(((char *)(p)) - ((p)->prev_foot) ))
+
+/* extract next chunk's pinuse bit */
+#define next_pinuse(p)	((next_chunk(p)->head) & PINUSE_BIT)
+
+/* Get/set size at footer */
+#define get_foot(p, s)	(((mchunkptr)((char *)(p) + (s)))->prev_foot)
+#define set_foot(p, s)	(((mchunkptr)((char *)(p) + (s)))->prev_foot = (s))
+
+/* Set size, pinuse bit, and foot */
+#define set_size_and_pinuse_of_free_chunk(p, s)\
+  ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
+
+/* Set size, pinuse bit, foot, and clear next pinuse */
+#define set_free_with_pinuse(p, s, n)\
+  (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+
+#define is_direct(p)\
+  (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT))
+
+/* Get the internal overhead associated with chunk p */
+#define overhead_for(p)\
+ (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+struct malloc_tree_chunk {
+  /* The first four fields must be compatible with malloc_chunk */
+  size_t                    prev_foot;
+  size_t                    head;
+  struct malloc_tree_chunk *fd;
+  struct malloc_tree_chunk *bk;
+
+  struct malloc_tree_chunk *child[2];
+  struct malloc_tree_chunk *parent;
+  bindex_t                  index;
+};
+
+typedef struct malloc_tree_chunk  tchunk;
+typedef struct malloc_tree_chunk *tchunkptr;
+typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */
+
+/* A little helper macro for trees */
+#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
+
+/* ----------------------------- Segments -------------------------------- */
+
+struct malloc_segment {
+  char        *base;             /* base address */
+  size_t       size;             /* allocated size */
+  struct malloc_segment *next;   /* ptr to next segment */
+};
+
+typedef struct malloc_segment  msegment;
+typedef struct malloc_segment *msegmentptr;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/* Bin types, widths and sizes */
+#define NSMALLBINS		(32U)
+#define NTREEBINS		(32U)
+#define SMALLBIN_SHIFT		(3U)
+#define SMALLBIN_WIDTH		(SIZE_T_ONE << SMALLBIN_SHIFT)
+#define TREEBIN_SHIFT		(8U)
+#define MIN_LARGE_SIZE		(SIZE_T_ONE << TREEBIN_SHIFT)
+#define MAX_SMALL_SIZE		(MIN_LARGE_SIZE - SIZE_T_ONE)
+#define MAX_SMALL_REQUEST  (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+
+struct malloc_state {
+  binmap_t   smallmap;
+  binmap_t   treemap;
+  size_t     dvsize;
+  size_t     topsize;
+  mchunkptr  dv;
+  mchunkptr  top;
+  size_t     trim_check;
+  size_t     release_checks;
+  mchunkptr  smallbins[(NSMALLBINS+1)*2];
+  tbinptr    treebins[NTREEBINS];
+  msegment   seg;
+};
+
+typedef struct malloc_state *mstate;
+
+#define is_initialized(M)	((M)->top != 0)
+
+/* -------------------------- system alloc setup ------------------------- */
+
+/* page-align a size */
+#define page_align(S)\
+ (((S) + (LJ_PAGESIZE - SIZE_T_ONE)) & ~(LJ_PAGESIZE - SIZE_T_ONE))
+
+/* granularity-align a size */
+#define granularity_align(S)\
+  (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\
+   & ~(DEFAULT_GRANULARITY - SIZE_T_ONE))
+
+#if LJ_TARGET_WINDOWS
+#define mmap_align(S)	granularity_align(S)
+#else
+#define mmap_align(S)	page_align(S)
+#endif
+
+/*  True if segment S holds address A */
+#define segment_holds(S, A)\
+  ((char *)(A) >= S->base && (char *)(A) < S->base + S->size)
+
+/* Return segment holding given address */
+static msegmentptr segment_holding(mstate m, char *addr)
+{
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= sp->base && addr < sp->base + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+/* Return true if segment contains a segment link */
+static int has_segment_link(mstate m, msegmentptr ss)
+{
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size)
+      return 1;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+/*
+  TOP_FOOT_SIZE is padding at the end of a segment, including space
+  that may be needed to place segment records and fenceposts when new
+  noncontiguous segments are added.
+*/
+#define TOP_FOOT_SIZE\
+  (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+
+/* ---------------------------- Indexing Bins ---------------------------- */
+
+#define is_small(s)		(((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+#define small_index(s)		((s)  >> SMALLBIN_SHIFT)
+#define small_index2size(i)	((i)  << SMALLBIN_SHIFT)
+#define MIN_SMALL_INDEX		(small_index(MIN_CHUNK_SIZE))
+
+/* addressing by index. See above about smallbin repositioning */
+#define smallbin_at(M, i)	((sbinptr)((char *)&((M)->smallbins[(i)<<1])))
+#define treebin_at(M,i)		(&((M)->treebins[i]))
+
+/* assign tree index for size S to variable I */
+#define compute_tree_index(S, I)\
+{\
+  unsigned int X = (unsigned int)(S >> TREEBIN_SHIFT);\
+  if (X == 0) {\
+    I = 0;\
+  } else if (X > 0xFFFF) {\
+    I = NTREEBINS-1;\
+  } else {\
+    unsigned int K = lj_fls(X);\
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+/* Bit representing maximum resolved size in a treebin at i */
+#define bit_for_tree_index(i) \
+   (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
+
+/* Shift placing maximum resolved bit in a treebin at i as sign bit */
+#define leftshift_for_tree_index(i) \
+   ((i == NTREEBINS-1)? 0 : \
+    ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+
+/* The size of the smallest chunk held in bin with index i */
+#define minsize_for_tree_index(i) \
+   ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \
+   (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+
+/* ------------------------ Operations on bin maps ----------------------- */
+
+/* bit corresponding to given index */
+#define idx2bit(i)		((binmap_t)(1) << (i))
+
+/* Mark/Clear bits with given index */
+#define mark_smallmap(M,i)	((M)->smallmap |=  idx2bit(i))
+#define clear_smallmap(M,i)	((M)->smallmap &= ~idx2bit(i))
+#define smallmap_is_marked(M,i)	((M)->smallmap &   idx2bit(i))
+
+#define mark_treemap(M,i)	((M)->treemap  |=  idx2bit(i))
+#define clear_treemap(M,i)	((M)->treemap  &= ~idx2bit(i))
+#define treemap_is_marked(M,i)	((M)->treemap  &   idx2bit(i))
+
+/* mask with all bits to left of least bit of x on */
+#define left_bits(x)		((x<<1) | (~(x<<1)+1))
+
+/* Set cinuse bit and pinuse bit of next chunk */
+#define set_inuse(M,p,s)\
+  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+  ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+#define set_inuse_and_pinuse(M,p,s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set size, cinuse and pinuse bit of this chunk */
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/* Link a free chunk into a smallbin  */
+#define insert_small_chunk(M, P, S) {\
+  bindex_t I = small_index(S);\
+  mchunkptr B = smallbin_at(M, I);\
+  mchunkptr F = B;\
+  if (!smallmap_is_marked(M, I))\
+    mark_smallmap(M, I);\
+  else\
+    F = B->fd;\
+  B->fd = P;\
+  F->bk = P;\
+  P->fd = F;\
+  P->bk = B;\
+}
+
+/* Unlink a chunk from a smallbin  */
+#define unlink_small_chunk(M, P, S) {\
+  mchunkptr F = P->fd;\
+  mchunkptr B = P->bk;\
+  bindex_t I = small_index(S);\
+  if (F == B) {\
+    clear_smallmap(M, I);\
+  } else {\
+    F->bk = B;\
+    B->fd = F;\
+  }\
+}
+
+/* Unlink the first chunk from a smallbin */
+#define unlink_first_small_chunk(M, B, P, I) {\
+  mchunkptr F = P->fd;\
+  if (B == F) {\
+    clear_smallmap(M, I);\
+  } else {\
+    B->fd = F;\
+    F->bk = B;\
+  }\
+}
+
+/* Replace dv node, binning the old one */
+/* Used only when dvsize known to be small */
+#define replace_dv(M, P, S) {\
+  size_t DVS = M->dvsize;\
+  if (DVS != 0) {\
+    mchunkptr DV = M->dv;\
+    insert_small_chunk(M, DV, DVS);\
+  }\
+  M->dvsize = S;\
+  M->dv = P;\
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+/* Insert chunk into tree */
+#define insert_large_chunk(M, X, S) {\
+  tbinptr *H;\
+  bindex_t I;\
+  compute_tree_index(S, I);\
+  H = treebin_at(M, I);\
+  X->index = I;\
+  X->child[0] = X->child[1] = 0;\
+  if (!treemap_is_marked(M, I)) {\
+    mark_treemap(M, I);\
+    *H = X;\
+    X->parent = (tchunkptr)H;\
+    X->fd = X->bk = X;\
+  } else {\
+    tchunkptr T = *H;\
+    size_t K = S << leftshift_for_tree_index(I);\
+    for (;;) {\
+      if (chunksize(T) != S) {\
+	tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
+	K <<= 1;\
+	if (*C != 0) {\
+	  T = *C;\
+	} else {\
+	  *C = X;\
+	  X->parent = T;\
+	  X->fd = X->bk = X;\
+	  break;\
+	}\
+      } else {\
+	tchunkptr F = T->fd;\
+	T->fd = F->bk = X;\
+	X->fd = F;\
+	X->bk = T;\
+	X->parent = 0;\
+	break;\
+      }\
+    }\
+  }\
+}
+
+#define unlink_large_chunk(M, X) {\
+  tchunkptr XP = X->parent;\
+  tchunkptr R;\
+  if (X->bk != X) {\
+    tchunkptr F = X->fd;\
+    R = X->bk;\
+    F->bk = R;\
+    R->fd = F;\
+  } else {\
+    tchunkptr *RP;\
+    if (((R = *(RP = &(X->child[1]))) != 0) ||\
+	((R = *(RP = &(X->child[0]))) != 0)) {\
+      tchunkptr *CP;\
+      while ((*(CP = &(R->child[1])) != 0) ||\
+	     (*(CP = &(R->child[0])) != 0)) {\
+	R = *(RP = CP);\
+      }\
+      *RP = 0;\
+    }\
+  }\
+  if (XP != 0) {\
+    tbinptr *H = treebin_at(M, X->index);\
+    if (X == *H) {\
+      if ((*H = R) == 0) \
+	clear_treemap(M, X->index);\
+    } else {\
+      if (XP->child[0] == X) \
+	XP->child[0] = R;\
+      else \
+	XP->child[1] = R;\
+    }\
+    if (R != 0) {\
+      tchunkptr C0, C1;\
+      R->parent = XP;\
+      if ((C0 = X->child[0]) != 0) {\
+	R->child[0] = C0;\
+	C0->parent = R;\
+      }\
+      if ((C1 = X->child[1]) != 0) {\
+	R->child[1] = C1;\
+	C1->parent = R;\
+      }\
+    }\
+  }\
+}
+
+/* Relays to large vs small bin operations */
+
+#define insert_chunk(M, P, S)\
+  if (is_small(S)) { insert_small_chunk(M, P, S)\
+  } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
+
+#define unlink_chunk(M, P, S)\
+  if (is_small(S)) { unlink_small_chunk(M, P, S)\
+  } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
+
+/* -----------------------  Direct-mmapping chunks ----------------------- */
+
+static void *direct_alloc(size_t nb)
+{
+  size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  if (LJ_LIKELY(mmsize > nb)) {     /* Check for wrap around 0 */
+    char *mm = (char *)(DIRECT_MMAP(mmsize));
+    if (mm != CMFAIL) {
+      size_t offset = align_offset(chunk2mem(mm));
+      size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
+      mchunkptr p = (mchunkptr)(mm + offset);
+      p->prev_foot = offset | IS_DIRECT_BIT;
+      p->head = psize|CINUSE_BIT;
+      chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
+      return chunk2mem(p);
+    }
+  }
+  return NULL;
+}
+
+static mchunkptr direct_resize(mchunkptr oldp, size_t nb)
+{
+  size_t oldsize = chunksize(oldp);
+  if (is_small(nb)) /* Can't shrink direct regions below small size */
+    return NULL;
+  /* Keep old chunk if big enough but not too big */
+  if (oldsize >= nb + SIZE_T_SIZE &&
+      (oldsize - nb) <= (DEFAULT_GRANULARITY >> 1)) {
+    return oldp;
+  } else {
+    size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT;
+    size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD;
+    size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+    char *cp = (char *)CALL_MREMAP((char *)oldp - offset,
+				   oldmmsize, newmmsize, CALL_MREMAP_MV);
+    if (cp != CMFAIL) {
+      mchunkptr newp = (mchunkptr)(cp + offset);
+      size_t psize = newmmsize - offset - DIRECT_FOOT_PAD;
+      newp->head = psize|CINUSE_BIT;
+      chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
+      return newp;
+    }
+  }
+  return NULL;
+}
+
+/* -------------------------- mspace management -------------------------- */
+
+/* Initialize top chunk and its size */
+static void init_top(mstate m, mchunkptr p, size_t psize)
+{
+  /* Ensure alignment */
+  size_t offset = align_offset(chunk2mem(p));
+  p = (mchunkptr)((char *)p + offset);
+  psize -= offset;
+
+  m->top = p;
+  m->topsize = psize;
+  p->head = psize | PINUSE_BIT;
+  /* set size of fake trailing chunk holding overhead space only once */
+  chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
+  m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */
+}
+
+/* Initialize bins for a new mstate that is otherwise zeroed out */
+static void init_bins(mstate m)
+{
+  /* Establish circular links for smallbins */
+  bindex_t i;
+  for (i = 0; i < NSMALLBINS; i++) {
+    sbinptr bin = smallbin_at(m,i);
+    bin->fd = bin->bk = bin;
+  }
+}
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb)
+{
+  mchunkptr p = align_as_chunk(newbase);
+  mchunkptr oldfirst = align_as_chunk(oldbase);
+  size_t psize = (size_t)((char *)oldfirst - (char *)p);
+  mchunkptr q = chunk_plus_offset(p, nb);
+  size_t qsize = psize - nb;
+  set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+
+  /* consolidate remainder with first chunk of old base */
+  if (oldfirst == m->top) {
+    size_t tsize = m->topsize += qsize;
+    m->top = q;
+    q->head = tsize | PINUSE_BIT;
+  } else if (oldfirst == m->dv) {
+    size_t dsize = m->dvsize += qsize;
+    m->dv = q;
+    set_size_and_pinuse_of_free_chunk(q, dsize);
+  } else {
+    if (!cinuse(oldfirst)) {
+      size_t nsize = chunksize(oldfirst);
+      unlink_chunk(m, oldfirst, nsize);
+      oldfirst = chunk_plus_offset(oldfirst, nsize);
+      qsize += nsize;
+    }
+    set_free_with_pinuse(q, qsize, oldfirst);
+    insert_chunk(m, q, qsize);
+  }
+
+  return chunk2mem(p);
+}
+
+/* Add a segment to hold a new noncontiguous region */
+static void add_segment(mstate m, char *tbase, size_t tsize)
+{
+  /* Determine locations and sizes of segment, fenceposts, old top */
+  char *old_top = (char *)m->top;
+  msegmentptr oldsp = segment_holding(m, old_top);
+  char *old_end = oldsp->base + oldsp->size;
+  size_t ssize = pad_request(sizeof(struct malloc_segment));
+  char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  size_t offset = align_offset(chunk2mem(rawsp));
+  char *asp = rawsp + offset;
+  char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
+  mchunkptr sp = (mchunkptr)csp;
+  msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+  mchunkptr tnext = chunk_plus_offset(sp, ssize);
+  mchunkptr p = tnext;
+
+  /* reset top to new space */
+  init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+  /* Set up segment record */
+  set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
+  *ss = m->seg; /* Push current record */
+  m->seg.base = tbase;
+  m->seg.size = tsize;
+  m->seg.next = ss;
+
+  /* Insert trailing fenceposts */
+  for (;;) {
+    mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
+    p->head = FENCEPOST_HEAD;
+    if ((char *)(&(nextp->head)) < old_end)
+      p = nextp;
+    else
+      break;
+  }
+
+  /* Insert the rest of old top into a bin as an ordinary free chunk */
+  if (csp != old_top) {
+    mchunkptr q = (mchunkptr)old_top;
+    size_t psize = (size_t)(csp - old_top);
+    mchunkptr tn = chunk_plus_offset(q, psize);
+    set_free_with_pinuse(q, psize, tn);
+    insert_chunk(m, q, psize);
+  }
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+static void *alloc_sys(mstate m, size_t nb)
+{
+  char *tbase = CMFAIL;
+  size_t tsize = 0;
+
+  /* Directly map large chunks */
+  if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
+    void *mem = direct_alloc(nb);
+    if (mem != 0)
+      return mem;
+  }
+
+  {
+    size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
+    size_t rsize = granularity_align(req);
+    if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
+      char *mp = (char *)(CALL_MMAP(rsize));
+      if (mp != CMFAIL) {
+	tbase = mp;
+	tsize = rsize;
+      }
+    }
+  }
+
+  if (tbase != CMFAIL) {
+    msegmentptr sp = &m->seg;
+    /* Try to merge with an existing segment */
+    while (sp != 0 && tbase != sp->base + sp->size)
+      sp = sp->next;
+    if (sp != 0 && segment_holds(sp, m->top)) { /* append */
+      sp->size += tsize;
+      init_top(m, m->top, m->topsize + tsize);
+    } else {
+      sp = &m->seg;
+      while (sp != 0 && sp->base != tbase + tsize)
+	sp = sp->next;
+      if (sp != 0) {
+	char *oldbase = sp->base;
+	sp->base = tbase;
+	sp->size += tsize;
+	return prepend_alloc(m, tbase, oldbase, nb);
+      } else {
+	add_segment(m, tbase, tsize);
+      }
+    }
+
+    if (nb < m->topsize) { /* Allocate from new or extended top space */
+      size_t rsize = m->topsize -= nb;
+      mchunkptr p = m->top;
+      mchunkptr r = m->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+      return chunk2mem(p);
+    }
+  }
+
+  return NULL;
+}
+
+/* -----------------------  system deallocation -------------------------- */
+
+/* Unmap and unlink any mmapped segments that don't contain used chunks */
+static size_t release_unused_segments(mstate m)
+{
+  size_t released = 0;
+  size_t nsegs = 0;
+  msegmentptr pred = &m->seg;
+  msegmentptr sp = pred->next;
+  while (sp != 0) {
+    char *base = sp->base;
+    size_t size = sp->size;
+    msegmentptr next = sp->next;
+    nsegs++;
+    {
+      mchunkptr p = align_as_chunk(base);
+      size_t psize = chunksize(p);
+      /* Can unmap if first chunk holds entire segment and not pinned */
+      if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
+	tchunkptr tp = (tchunkptr)p;
+	if (p == m->dv) {
+	  m->dv = 0;
+	  m->dvsize = 0;
+	} else {
+	  unlink_large_chunk(m, tp);
+	}
+	if (CALL_MUNMAP(base, size) == 0) {
+	  released += size;
+	  /* unlink obsoleted record */
+	  sp = pred;
+	  sp->next = next;
+	} else { /* back out if cannot unmap */
+	  insert_large_chunk(m, tp, psize);
+	}
+      }
+    }
+    pred = sp;
+    sp = next;
+  }
+  /* Reset check counter */
+  m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ?
+		      nsegs : MAX_RELEASE_CHECK_RATE;
+  return released;
+}
+
+static int alloc_trim(mstate m, size_t pad)
+{
+  size_t released = 0;
+  if (pad < MAX_REQUEST && is_initialized(m)) {
+    pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
+
+    if (m->topsize > pad) {
+      /* Shrink top space in granularity-size units, keeping at least one */
+      size_t unit = DEFAULT_GRANULARITY;
+      size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
+		      SIZE_T_ONE) * unit;
+      msegmentptr sp = segment_holding(m, (char *)m->top);
+
+      if (sp->size >= extra &&
+	  !has_segment_link(m, sp)) { /* can't shrink if pinned */
+	size_t newsize = sp->size - extra;
+	/* Prefer mremap, fall back to munmap */
+	if ((CALL_MREMAP(sp->base, sp->size, newsize, CALL_MREMAP_NOMOVE) != MFAIL) ||
+	    (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
+	  released = extra;
+	}
+      }
+
+      if (released != 0) {
+	sp->size -= released;
+	init_top(m, m->top, m->topsize - released);
+      }
+    }
+
+    /* Unmap any unused mmapped segments */
+    released += release_unused_segments(m);
+
+    /* On failure, disable autotrim to avoid repeated failed future calls */
+    if (released == 0 && m->topsize > m->trim_check)
+      m->trim_check = MAX_SIZE_T;
+  }
+
+  return (released != 0)? 1 : 0;
+}
+
+/* ---------------------------- malloc support --------------------------- */
+
+/* allocate a large request from the best fitting chunk in a treebin */
+static void *tmalloc_large(mstate m, size_t nb)
+{
+  tchunkptr v = 0;
+  size_t rsize = ~nb+1; /* Unsigned negation */
+  tchunkptr t;
+  bindex_t idx;
+  compute_tree_index(nb, idx);
+
+  if ((t = *treebin_at(m, idx)) != 0) {
+    /* Traverse tree for this bin looking for node with size == nb */
+    size_t sizebits = nb << leftshift_for_tree_index(idx);
+    tchunkptr rst = 0;  /* The deepest untaken right subtree */
+    for (;;) {
+      tchunkptr rt;
+      size_t trem = chunksize(t) - nb;
+      if (trem < rsize) {
+	v = t;
+	if ((rsize = trem) == 0)
+	  break;
+      }
+      rt = t->child[1];
+      t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+      if (rt != 0 && rt != t)
+	rst = rt;
+      if (t == 0) {
+	t = rst; /* set t to least subtree holding sizes > nb */
+	break;
+      }
+      sizebits <<= 1;
+    }
+  }
+
+  if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
+    binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
+    if (leftbits != 0)
+      t = *treebin_at(m, lj_ffs(leftbits));
+  }
+
+  while (t != 0) { /* find smallest of tree or subtree */
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+    t = leftmost_child(t);
+  }
+
+  /*  If dv is a better fit, return NULL so malloc will use it */
+  if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
+    mchunkptr r = chunk_plus_offset(v, nb);
+    unlink_large_chunk(m, v);
+    if (rsize < MIN_CHUNK_SIZE) {
+      set_inuse_and_pinuse(m, v, (rsize + nb));
+    } else {
+      set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+      set_size_and_pinuse_of_free_chunk(r, rsize);
+      insert_chunk(m, r, rsize);
+    }
+    return chunk2mem(v);
+  }
+  return NULL;
+}
+
+/* allocate a small request from the best fitting chunk in a treebin */
+static void *tmalloc_small(mstate m, size_t nb)
+{
+  tchunkptr t, v;
+  mchunkptr r;
+  size_t rsize;
+  bindex_t i = lj_ffs(m->treemap);
+
+  v = t = *treebin_at(m, i);
+  rsize = chunksize(t) - nb;
+
+  while ((t = leftmost_child(t)) != 0) {
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+  }
+
+  r = chunk_plus_offset(v, nb);
+  unlink_large_chunk(m, v);
+  if (rsize < MIN_CHUNK_SIZE) {
+    set_inuse_and_pinuse(m, v, (rsize + nb));
+  } else {
+    set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+    set_size_and_pinuse_of_free_chunk(r, rsize);
+    replace_dv(m, r, rsize);
+  }
+  return chunk2mem(v);
+}
+
+/* ----------------------------------------------------------------------- */
+
+void *lj_alloc_create(void)
+{
+  size_t tsize = DEFAULT_GRANULARITY;
+  char *tbase;
+  INIT_MMAP();
+  tbase = (char *)(CALL_MMAP(tsize));
+  if (tbase != CMFAIL) {
+    size_t msize = pad_request(sizeof(struct malloc_state));
+    mchunkptr mn;
+    mchunkptr msp = align_as_chunk(tbase);
+    mstate m = (mstate)(chunk2mem(msp));
+    memset(m, 0, msize);
+    msp->head = (msize|PINUSE_BIT|CINUSE_BIT);
+    m->seg.base = tbase;
+    m->seg.size = tsize;
+    m->release_checks = MAX_RELEASE_CHECK_RATE;
+    init_bins(m);
+    mn = next_chunk(mem2chunk(m));
+    init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE);
+    return m;
+  }
+  return NULL;
+}
+
+void lj_alloc_destroy(void *msp)
+{
+  mstate ms = (mstate)msp;
+  msegmentptr sp = &ms->seg;
+  while (sp != 0) {
+    char *base = sp->base;
+    size_t size = sp->size;
+    sp = sp->next;
+    CALL_MUNMAP(base, size);
+  }
+}
+
+static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize)
+{
+  mstate ms = (mstate)msp;
+  void *mem;
+  size_t nb;
+  if (nsize <= MAX_SMALL_REQUEST) {
+    bindex_t idx;
+    binmap_t smallbits;
+    nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize);
+    idx = small_index(nb);
+    smallbits = ms->smallmap >> idx;
+
+    if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+      mchunkptr b, p;
+      idx += ~smallbits & 1;       /* Uses next bin if idx empty */
+      b = smallbin_at(ms, idx);
+      p = b->fd;
+      unlink_first_small_chunk(ms, b, p, idx);
+      set_inuse_and_pinuse(ms, p, small_index2size(idx));
+      mem = chunk2mem(p);
+      return mem;
+    } else if (nb > ms->dvsize) {
+      if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+	mchunkptr b, p, r;
+	size_t rsize;
+	binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+	bindex_t i = lj_ffs(leftbits);
+	b = smallbin_at(ms, i);
+	p = b->fd;
+	unlink_first_small_chunk(ms, b, p, i);
+	rsize = small_index2size(i) - nb;
+	/* Fit here cannot be remainderless if 4byte sizes */
+	if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) {
+	  set_inuse_and_pinuse(ms, p, small_index2size(i));
+	} else {
+	  set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+	  r = chunk_plus_offset(p, nb);
+	  set_size_and_pinuse_of_free_chunk(r, rsize);
+	  replace_dv(ms, r, rsize);
+	}
+	mem = chunk2mem(p);
+	return mem;
+      } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
+	return mem;
+      }
+    }
+  } else if (nsize >= MAX_REQUEST) {
+    nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+  } else {
+    nb = pad_request(nsize);
+    if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
+      return mem;
+    }
+  }
+
+  if (nb <= ms->dvsize) {
+    size_t rsize = ms->dvsize - nb;
+    mchunkptr p = ms->dv;
+    if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+      mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
+      ms->dvsize = rsize;
+      set_size_and_pinuse_of_free_chunk(r, rsize);
+      set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+    } else { /* exhaust dv */
+      size_t dvs = ms->dvsize;
+      ms->dvsize = 0;
+      ms->dv = 0;
+      set_inuse_and_pinuse(ms, p, dvs);
+    }
+    mem = chunk2mem(p);
+    return mem;
+  } else if (nb < ms->topsize) { /* Split top */
+    size_t rsize = ms->topsize -= nb;
+    mchunkptr p = ms->top;
+    mchunkptr r = ms->top = chunk_plus_offset(p, nb);
+    r->head = rsize | PINUSE_BIT;
+    set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+    mem = chunk2mem(p);
+    return mem;
+  }
+  return alloc_sys(ms, nb);
+}
+
+static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr)
+{
+  if (ptr != 0) {
+    mchunkptr p = mem2chunk(ptr);
+    mstate fm = (mstate)msp;
+    size_t psize = chunksize(p);
+    mchunkptr next = chunk_plus_offset(p, psize);
+    if (!pinuse(p)) {
+      size_t prevsize = p->prev_foot;
+      if ((prevsize & IS_DIRECT_BIT) != 0) {
+	prevsize &= ~IS_DIRECT_BIT;
+	psize += prevsize + DIRECT_FOOT_PAD;
+	CALL_MUNMAP((char *)p - prevsize, psize);
+	return NULL;
+      } else {
+	mchunkptr prev = chunk_minus_offset(p, prevsize);
+	psize += prevsize;
+	p = prev;
+	/* consolidate backward */
+	if (p != fm->dv) {
+	  unlink_chunk(fm, p, prevsize);
+	} else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+	  fm->dvsize = psize;
+	  set_free_with_pinuse(p, psize, next);
+	  return NULL;
+	}
+      }
+    }
+    if (!cinuse(next)) {  /* consolidate forward */
+      if (next == fm->top) {
+	size_t tsize = fm->topsize += psize;
+	fm->top = p;
+	p->head = tsize | PINUSE_BIT;
+	if (p == fm->dv) {
+	  fm->dv = 0;
+	  fm->dvsize = 0;
+	}
+	if (tsize > fm->trim_check)
+	  alloc_trim(fm, 0);
+	return NULL;
+      } else if (next == fm->dv) {
+	size_t dsize = fm->dvsize += psize;
+	fm->dv = p;
+	set_size_and_pinuse_of_free_chunk(p, dsize);
+	return NULL;
+      } else {
+	size_t nsize = chunksize(next);
+	psize += nsize;
+	unlink_chunk(fm, next, nsize);
+	set_size_and_pinuse_of_free_chunk(p, psize);
+	if (p == fm->dv) {
+	  fm->dvsize = psize;
+	  return NULL;
+	}
+      }
+    } else {
+      set_free_with_pinuse(p, psize, next);
+    }
+
+    if (is_small(psize)) {
+      insert_small_chunk(fm, p, psize);
+    } else {
+      tchunkptr tp = (tchunkptr)p;
+      insert_large_chunk(fm, tp, psize);
+      if (--fm->release_checks == 0)
+	release_unused_segments(fm);
+    }
+  }
+  return NULL;
+}
+
+static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize)
+{
+  if (nsize >= MAX_REQUEST) {
+    return NULL;
+  } else {
+    mstate m = (mstate)msp;
+    mchunkptr oldp = mem2chunk(ptr);
+    size_t oldsize = chunksize(oldp);
+    mchunkptr next = chunk_plus_offset(oldp, oldsize);
+    mchunkptr newp = 0;
+    size_t nb = request2size(nsize);
+
+    /* Try to either shrink or extend into top. Else malloc-copy-free */
+    if (is_direct(oldp)) {
+      newp = direct_resize(oldp, nb);  /* this may return NULL. */
+    } else if (oldsize >= nb) { /* already big enough */
+      size_t rsize = oldsize - nb;
+      newp = oldp;
+      if (rsize >= MIN_CHUNK_SIZE) {
+	mchunkptr rem = chunk_plus_offset(newp, nb);
+	set_inuse(m, newp, nb);
+	set_inuse(m, rem, rsize);
+	lj_alloc_free(m, chunk2mem(rem));
+      }
+    } else if (next == m->top && oldsize + m->topsize > nb) {
+      /* Expand into top */
+      size_t newsize = oldsize + m->topsize;
+      size_t newtopsize = newsize - nb;
+      mchunkptr newtop = chunk_plus_offset(oldp, nb);
+      set_inuse(m, oldp, nb);
+      newtop->head = newtopsize |PINUSE_BIT;
+      m->top = newtop;
+      m->topsize = newtopsize;
+      newp = oldp;
+    }
+
+    if (newp != 0) {
+      return chunk2mem(newp);
+    } else {
+      void *newmem = lj_alloc_malloc(m, nsize);
+      if (newmem != 0) {
+	size_t oc = oldsize - overhead_for(oldp);
+	memcpy(newmem, ptr, oc < nsize ? oc : nsize);
+	lj_alloc_free(m, ptr);
+      }
+      return newmem;
+    }
+  }
+}
+
+void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize)
+{
+  (void)osize;
+  if (nsize == 0) {
+    return lj_alloc_free(msp, ptr);
+  } else if (ptr == NULL) {
+    return lj_alloc_malloc(msp, nsize);
+  } else {
+    return lj_alloc_realloc(msp, ptr, nsize);
+  }
+}
+
+#endif

+ 17 - 0
luajit.mod/luajit/src/lj_alloc.h

@@ -0,0 +1,17 @@
+/*
+** Bundled memory allocator.
+** Donated to the public domain.
+*/
+
+#ifndef _LJ_ALLOC_H
+#define _LJ_ALLOC_H
+
+#include "lj_def.h"
+
+#ifndef LUAJIT_USE_SYSMALLOC
+LJ_FUNC void *lj_alloc_create(void);
+LJ_FUNC void lj_alloc_destroy(void *msp);
+LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize);
+#endif
+
+#endif

+ 1200 - 0
luajit.mod/luajit/src/lj_api.c

@@ -0,0 +1,1200 @@
+/*
+** Public Lua/C API.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_api_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_debug.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_func.h"
+#include "lj_udata.h"
+#include "lj_meta.h"
+#include "lj_state.h"
+#include "lj_bc.h"
+#include "lj_frame.h"
+#include "lj_trace.h"
+#include "lj_vm.h"
+#include "lj_strscan.h"
+
+/* -- Common helper functions --------------------------------------------- */
+
+#define api_checknelems(L, n)		api_check(L, (n) <= (L->top - L->base))
+#define api_checkvalidindex(L, i)	api_check(L, (i) != niltv(L))
+
+static TValue *index2adr(lua_State *L, int idx)
+{
+  if (idx > 0) {
+    TValue *o = L->base + (idx - 1);
+    return o < L->top ? o : niltv(L);
+  } else if (idx > LUA_REGISTRYINDEX) {
+    api_check(L, idx != 0 && -idx <= L->top - L->base);
+    return L->top + idx;
+  } else if (idx == LUA_GLOBALSINDEX) {
+    TValue *o = &G(L)->tmptv;
+    settabV(L, o, tabref(L->env));
+    return o;
+  } else if (idx == LUA_REGISTRYINDEX) {
+    return registry(L);
+  } else {
+    GCfunc *fn = curr_func(L);
+    api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn));
+    if (idx == LUA_ENVIRONINDEX) {
+      TValue *o = &G(L)->tmptv;
+      settabV(L, o, tabref(fn->c.env));
+      return o;
+    } else {
+      idx = LUA_GLOBALSINDEX - idx;
+      return idx <= fn->c.nupvalues ? &fn->c.upvalue[idx-1] : niltv(L);
+    }
+  }
+}
+
+static TValue *stkindex2adr(lua_State *L, int idx)
+{
+  if (idx > 0) {
+    TValue *o = L->base + (idx - 1);
+    return o < L->top ? o : niltv(L);
+  } else {
+    api_check(L, idx != 0 && -idx <= L->top - L->base);
+    return L->top + idx;
+  }
+}
+
+static GCtab *getcurrenv(lua_State *L)
+{
+  GCfunc *fn = curr_func(L);
+  return fn->c.gct == ~LJ_TFUNC ? tabref(fn->c.env) : tabref(L->env);
+}
+
+/* -- Miscellaneous API functions ----------------------------------------- */
+
+LUA_API int lua_status(lua_State *L)
+{
+  return L->status;
+}
+
+LUA_API int lua_checkstack(lua_State *L, int size)
+{
+  if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
+    return 0;  /* Stack overflow. */
+  } else if (size > 0) {
+    lj_state_checkstack(L, (MSize)size);
+  }
+  return 1;
+}
+
+LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg)
+{
+  if (!lua_checkstack(L, size))
+    lj_err_callerv(L, LJ_ERR_STKOVM, msg);
+}
+
+LUA_API void lua_xmove(lua_State *from, lua_State *to, int n)
+{
+  TValue *f, *t;
+  if (from == to) return;
+  api_checknelems(from, n);
+  api_check(from, G(from) == G(to));
+  lj_state_checkstack(to, (MSize)n);
+  f = from->top;
+  t = to->top = to->top + n;
+  while (--n >= 0) copyTV(to, --t, --f);
+  from->top = f;
+}
+
+/* -- Stack manipulation -------------------------------------------------- */
+
+LUA_API int lua_gettop(lua_State *L)
+{
+  return (int)(L->top - L->base);
+}
+
+LUA_API void lua_settop(lua_State *L, int idx)
+{
+  if (idx >= 0) {
+    api_check(L, idx <= tvref(L->maxstack) - L->base);
+    if (L->base + idx > L->top) {
+      if (L->base + idx >= tvref(L->maxstack))
+	lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base));
+      do { setnilV(L->top++); } while (L->top < L->base + idx);
+    } else {
+      L->top = L->base + idx;
+    }
+  } else {
+    api_check(L, -(idx+1) <= (L->top - L->base));
+    L->top += idx+1;  /* Shrinks top (idx < 0). */
+  }
+}
+
+LUA_API void lua_remove(lua_State *L, int idx)
+{
+  TValue *p = stkindex2adr(L, idx);
+  api_checkvalidindex(L, p);
+  while (++p < L->top) copyTV(L, p-1, p);
+  L->top--;
+}
+
+LUA_API void lua_insert(lua_State *L, int idx)
+{
+  TValue *q, *p = stkindex2adr(L, idx);
+  api_checkvalidindex(L, p);
+  for (q = L->top; q > p; q--) copyTV(L, q, q-1);
+  copyTV(L, p, L->top);
+}
+
+LUA_API void lua_replace(lua_State *L, int idx)
+{
+  api_checknelems(L, 1);
+  if (idx == LUA_GLOBALSINDEX) {
+    api_check(L, tvistab(L->top-1));
+    /* NOBARRIER: A thread (i.e. L) is never black. */
+    setgcref(L->env, obj2gco(tabV(L->top-1)));
+  } else if (idx == LUA_ENVIRONINDEX) {
+    GCfunc *fn = curr_func(L);
+    if (fn->c.gct != ~LJ_TFUNC)
+      lj_err_msg(L, LJ_ERR_NOENV);
+    api_check(L, tvistab(L->top-1));
+    setgcref(fn->c.env, obj2gco(tabV(L->top-1)));
+    lj_gc_barrier(L, fn, L->top-1);
+  } else {
+    TValue *o = index2adr(L, idx);
+    api_checkvalidindex(L, o);
+    copyTV(L, o, L->top-1);
+    if (idx < LUA_GLOBALSINDEX)  /* Need a barrier for upvalues. */
+      lj_gc_barrier(L, curr_func(L), L->top-1);
+  }
+  L->top--;
+}
+
+LUA_API void lua_pushvalue(lua_State *L, int idx)
+{
+  copyTV(L, L->top, index2adr(L, idx));
+  incr_top(L);
+}
+
+/* -- Stack getters ------------------------------------------------------- */
+
+LUA_API int lua_type(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  if (tvisnumber(o)) {
+    return LUA_TNUMBER;
+#if LJ_64
+  } else if (tvislightud(o)) {
+    return LUA_TLIGHTUSERDATA;
+#endif
+  } else if (o == niltv(L)) {
+    return LUA_TNONE;
+  } else {  /* Magic internal/external tag conversion. ORDER LJ_T */
+    uint32_t t = ~itype(o);
+#if LJ_64
+    int tt = (int)((U64x(75a06,98042110) >> 4*t) & 15u);
+#else
+    int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u);
+#endif
+    lua_assert(tt != LUA_TNIL || tvisnil(o));
+    return tt;
+  }
+}
+
+LUALIB_API void luaL_checktype(lua_State *L, int idx, int tt)
+{
+  if (lua_type(L, idx) != tt)
+    lj_err_argt(L, idx, tt);
+}
+
+LUALIB_API void luaL_checkany(lua_State *L, int idx)
+{
+  if (index2adr(L, idx) == niltv(L))
+    lj_err_arg(L, idx, LJ_ERR_NOVAL);
+}
+
+LUA_API const char *lua_typename(lua_State *L, int t)
+{
+  UNUSED(L);
+  return lj_obj_typename[t+1];
+}
+
+LUA_API int lua_iscfunction(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  return tvisfunc(o) && !isluafunc(funcV(o));
+}
+
+LUA_API int lua_isnumber(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  return (tvisnumber(o) || (tvisstr(o) && lj_strscan_number(strV(o), &tmp)));
+}
+
+LUA_API int lua_isstring(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  return (tvisstr(o) || tvisnumber(o));
+}
+
+LUA_API int lua_isuserdata(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  return (tvisudata(o) || tvislightud(o));
+}
+
+LUA_API int lua_rawequal(lua_State *L, int idx1, int idx2)
+{
+  cTValue *o1 = index2adr(L, idx1);
+  cTValue *o2 = index2adr(L, idx2);
+  return (o1 == niltv(L) || o2 == niltv(L)) ? 0 : lj_obj_equal(o1, o2);
+}
+
+LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
+{
+  cTValue *o1 = index2adr(L, idx1);
+  cTValue *o2 = index2adr(L, idx2);
+  if (tvisint(o1) && tvisint(o2)) {
+    return intV(o1) == intV(o2);
+  } else if (tvisnumber(o1) && tvisnumber(o2)) {
+    return numberVnum(o1) == numberVnum(o2);
+  } else if (itype(o1) != itype(o2)) {
+    return 0;
+  } else if (tvispri(o1)) {
+    return o1 != niltv(L) && o2 != niltv(L);
+#if LJ_64
+  } else if (tvislightud(o1)) {
+    return o1->u64 == o2->u64;
+#endif
+  } else if (gcrefeq(o1->gcr, o2->gcr)) {
+    return 1;
+  } else if (!tvistabud(o1)) {
+    return 0;
+  } else {
+    TValue *base = lj_meta_equal(L, gcV(o1), gcV(o2), 0);
+    if ((uintptr_t)base <= 1) {
+      return (int)(uintptr_t)base;
+    } else {
+      L->top = base+2;
+      lj_vm_call(L, base, 1+1);
+      L->top -= 2;
+      return tvistruecond(L->top+1);
+    }
+  }
+}
+
+LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
+{
+  cTValue *o1 = index2adr(L, idx1);
+  cTValue *o2 = index2adr(L, idx2);
+  if (o1 == niltv(L) || o2 == niltv(L)) {
+    return 0;
+  } else if (tvisint(o1) && tvisint(o2)) {
+    return intV(o1) < intV(o2);
+  } else if (tvisnumber(o1) && tvisnumber(o2)) {
+    return numberVnum(o1) < numberVnum(o2);
+  } else {
+    TValue *base = lj_meta_comp(L, o1, o2, 0);
+    if ((uintptr_t)base <= 1) {
+      return (int)(uintptr_t)base;
+    } else {
+      L->top = base+2;
+      lj_vm_call(L, base, 1+1);
+      L->top -= 2;
+      return tvistruecond(L->top+1);
+    }
+  }
+}
+
+LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  if (LJ_LIKELY(tvisnumber(o)))
+    return numberVnum(o);
+  else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp))
+    return numV(&tmp);
+  else
+    return 0;
+}
+
+LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  if (LJ_LIKELY(tvisnumber(o)))
+    return numberVnum(o);
+  else if (!(tvisstr(o) && lj_strscan_num(strV(o), &tmp)))
+    lj_err_argt(L, idx, LUA_TNUMBER);
+  return numV(&tmp);
+}
+
+LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  if (LJ_LIKELY(tvisnumber(o)))
+    return numberVnum(o);
+  else if (tvisnil(o))
+    return def;
+  else if (!(tvisstr(o) && lj_strscan_num(strV(o), &tmp)))
+    lj_err_argt(L, idx, LUA_TNUMBER);
+  return numV(&tmp);
+}
+
+LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  lua_Number n;
+  if (LJ_LIKELY(tvisint(o))) {
+    return intV(o);
+  } else if (LJ_LIKELY(tvisnum(o))) {
+    n = numV(o);
+  } else {
+    if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
+      return 0;
+    if (tvisint(&tmp))
+      return (lua_Integer)intV(&tmp);
+    n = numV(&tmp);
+  }
+#if LJ_64
+  return (lua_Integer)n;
+#else
+  return lj_num2int(n);
+#endif
+}
+
+LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  lua_Number n;
+  if (LJ_LIKELY(tvisint(o))) {
+    return intV(o);
+  } else if (LJ_LIKELY(tvisnum(o))) {
+    n = numV(o);
+  } else {
+    if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
+      lj_err_argt(L, idx, LUA_TNUMBER);
+    if (tvisint(&tmp))
+      return (lua_Integer)intV(&tmp);
+    n = numV(&tmp);
+  }
+#if LJ_64
+  return (lua_Integer)n;
+#else
+  return lj_num2int(n);
+#endif
+}
+
+LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
+{
+  cTValue *o = index2adr(L, idx);
+  TValue tmp;
+  lua_Number n;
+  if (LJ_LIKELY(tvisint(o))) {
+    return intV(o);
+  } else if (LJ_LIKELY(tvisnum(o))) {
+    n = numV(o);
+  } else if (tvisnil(o)) {
+    return def;
+  } else {
+    if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
+      lj_err_argt(L, idx, LUA_TNUMBER);
+    if (tvisint(&tmp))
+      return (lua_Integer)intV(&tmp);
+    n = numV(&tmp);
+  }
+#if LJ_64
+  return (lua_Integer)n;
+#else
+  return lj_num2int(n);
+#endif
+}
+
+LUA_API int lua_toboolean(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  return tvistruecond(o);
+}
+
+LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
+{
+  TValue *o = index2adr(L, idx);
+  GCstr *s;
+  if (LJ_LIKELY(tvisstr(o))) {
+    s = strV(o);
+  } else if (tvisnumber(o)) {
+    lj_gc_check(L);
+    o = index2adr(L, idx);  /* GC may move the stack. */
+    s = lj_str_fromnumber(L, o);
+    setstrV(L, o, s);
+  } else {
+    if (len != NULL) *len = 0;
+    return NULL;
+  }
+  if (len != NULL) *len = s->len;
+  return strdata(s);
+}
+
+LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
+{
+  TValue *o = index2adr(L, idx);
+  GCstr *s;
+  if (LJ_LIKELY(tvisstr(o))) {
+    s = strV(o);
+  } else if (tvisnumber(o)) {
+    lj_gc_check(L);
+    o = index2adr(L, idx);  /* GC may move the stack. */
+    s = lj_str_fromnumber(L, o);
+    setstrV(L, o, s);
+  } else {
+    lj_err_argt(L, idx, LUA_TSTRING);
+  }
+  if (len != NULL) *len = s->len;
+  return strdata(s);
+}
+
+LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
+				       const char *def, size_t *len)
+{
+  TValue *o = index2adr(L, idx);
+  GCstr *s;
+  if (LJ_LIKELY(tvisstr(o))) {
+    s = strV(o);
+  } else if (tvisnil(o)) {
+    if (len != NULL) *len = def ? strlen(def) : 0;
+    return def;
+  } else if (tvisnumber(o)) {
+    lj_gc_check(L);
+    o = index2adr(L, idx);  /* GC may move the stack. */
+    s = lj_str_fromnumber(L, o);
+    setstrV(L, o, s);
+  } else {
+    lj_err_argt(L, idx, LUA_TSTRING);
+  }
+  if (len != NULL) *len = s->len;
+  return strdata(s);
+}
+
+LUALIB_API int luaL_checkoption(lua_State *L, int idx, const char *def,
+				const char *const lst[])
+{
+  ptrdiff_t i;
+  const char *s = lua_tolstring(L, idx, NULL);
+  if (s == NULL && (s = def) == NULL)
+    lj_err_argt(L, idx, LUA_TSTRING);
+  for (i = 0; lst[i]; i++)
+    if (strcmp(lst[i], s) == 0)
+      return (int)i;
+  lj_err_argv(L, idx, LJ_ERR_INVOPTM, s);
+}
+
+LUA_API size_t lua_objlen(lua_State *L, int idx)
+{
+  TValue *o = index2adr(L, idx);
+  if (tvisstr(o)) {
+    return strV(o)->len;
+  } else if (tvistab(o)) {
+    return (size_t)lj_tab_len(tabV(o));
+  } else if (tvisudata(o)) {
+    return udataV(o)->len;
+  } else if (tvisnumber(o)) {
+    GCstr *s = lj_str_fromnumber(L, o);
+    setstrV(L, o, s);
+    return s->len;
+  } else {
+    return 0;
+  }
+}
+
+LUA_API lua_CFunction lua_tocfunction(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  if (tvisfunc(o)) {
+    BCOp op = bc_op(*mref(funcV(o)->c.pc, BCIns));
+    if (op == BC_FUNCC || op == BC_FUNCCW)
+      return funcV(o)->c.f;
+  }
+  return NULL;
+}
+
+LUA_API void *lua_touserdata(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  if (tvisudata(o))
+    return uddata(udataV(o));
+  else if (tvislightud(o))
+    return lightudV(o);
+  else
+    return NULL;
+}
+
+LUA_API lua_State *lua_tothread(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  return (!tvisthread(o)) ? NULL : threadV(o);
+}
+
+LUA_API const void *lua_topointer(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  if (tvisudata(o))
+    return uddata(udataV(o));
+  else if (tvislightud(o))
+    return lightudV(o);
+  else if (tviscdata(o))
+    return cdataptr(cdataV(o));
+  else if (tvisgcv(o))
+    return gcV(o);
+  else
+    return NULL;
+}
+
+/* -- Stack setters (object creation) ------------------------------------- */
+
+LUA_API void lua_pushnil(lua_State *L)
+{
+  setnilV(L->top);
+  incr_top(L);
+}
+
+LUA_API void lua_pushnumber(lua_State *L, lua_Number n)
+{
+  setnumV(L->top, n);
+  if (LJ_UNLIKELY(tvisnan(L->top)))
+    setnanV(L->top);  /* Canonicalize injected NaNs. */
+  incr_top(L);
+}
+
+LUA_API void lua_pushinteger(lua_State *L, lua_Integer n)
+{
+  setintptrV(L->top, n);
+  incr_top(L);
+}
+
+LUA_API void lua_pushlstring(lua_State *L, const char *str, size_t len)
+{
+  GCstr *s;
+  lj_gc_check(L);
+  s = lj_str_new(L, str, len);
+  setstrV(L, L->top, s);
+  incr_top(L);
+}
+
+LUA_API void lua_pushstring(lua_State *L, const char *str)
+{
+  if (str == NULL) {
+    setnilV(L->top);
+  } else {
+    GCstr *s;
+    lj_gc_check(L);
+    s = lj_str_newz(L, str);
+    setstrV(L, L->top, s);
+  }
+  incr_top(L);
+}
+
+LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
+				     va_list argp)
+{
+  lj_gc_check(L);
+  return lj_str_pushvf(L, fmt, argp);
+}
+
+LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
+{
+  const char *ret;
+  va_list argp;
+  lj_gc_check(L);
+  va_start(argp, fmt);
+  ret = lj_str_pushvf(L, fmt, argp);
+  va_end(argp);
+  return ret;
+}
+
+LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n)
+{
+  GCfunc *fn;
+  lj_gc_check(L);
+  api_checknelems(L, n);
+  fn = lj_func_newC(L, (MSize)n, getcurrenv(L));
+  fn->c.f = f;
+  L->top -= n;
+  while (n--)
+    copyTV(L, &fn->c.upvalue[n], L->top+n);
+  setfuncV(L, L->top, fn);
+  lua_assert(iswhite(obj2gco(fn)));
+  incr_top(L);
+}
+
+LUA_API void lua_pushboolean(lua_State *L, int b)
+{
+  setboolV(L->top, (b != 0));
+  incr_top(L);
+}
+
+LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
+{
+  setlightudV(L->top, checklightudptr(L, p));
+  incr_top(L);
+}
+
+LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
+{
+  GCtab *t;
+  lj_gc_check(L);
+  t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec));
+  settabV(L, L->top, t);
+  incr_top(L);
+}
+
+LUALIB_API int luaL_newmetatable(lua_State *L, const char *tname)
+{
+  GCtab *regt = tabV(registry(L));
+  TValue *tv = lj_tab_setstr(L, regt, lj_str_newz(L, tname));
+  if (tvisnil(tv)) {
+    GCtab *mt = lj_tab_new(L, 0, 1);
+    settabV(L, tv, mt);
+    settabV(L, L->top++, mt);
+    lj_gc_anybarriert(L, regt);
+    return 1;
+  } else {
+    copyTV(L, L->top++, tv);
+    return 0;
+  }
+}
+
+LUA_API int lua_pushthread(lua_State *L)
+{
+  setthreadV(L, L->top, L);
+  incr_top(L);
+  return (mainthread(G(L)) == L);
+}
+
+LUA_API lua_State *lua_newthread(lua_State *L)
+{
+  lua_State *L1;
+  lj_gc_check(L);
+  L1 = lj_state_new(L);
+  setthreadV(L, L->top, L1);
+  incr_top(L);
+  return L1;
+}
+
+LUA_API void *lua_newuserdata(lua_State *L, size_t size)
+{
+  GCudata *ud;
+  lj_gc_check(L);
+  if (size > LJ_MAX_UDATA)
+    lj_err_msg(L, LJ_ERR_UDATAOV);
+  ud = lj_udata_new(L, (MSize)size, getcurrenv(L));
+  setudataV(L, L->top, ud);
+  incr_top(L);
+  return uddata(ud);
+}
+
+LUA_API void lua_concat(lua_State *L, int n)
+{
+  api_checknelems(L, n);
+  if (n >= 2) {
+    n--;
+    do {
+      TValue *top = lj_meta_cat(L, L->top-1, -n);
+      if (top == NULL) {
+	L->top -= n;
+	break;
+      }
+      n -= (int)(L->top - top);
+      L->top = top+2;
+      lj_vm_call(L, top, 1+1);
+      L->top--;
+      copyTV(L, L->top-1, L->top);
+    } while (--n > 0);
+  } else if (n == 0) {  /* Push empty string. */
+    setstrV(L, L->top, &G(L)->strempty);
+    incr_top(L);
+  }
+  /* else n == 1: nothing to do. */
+}
+
+/* -- Object getters ------------------------------------------------------ */
+
+LUA_API void lua_gettable(lua_State *L, int idx)
+{
+  cTValue *v, *t = index2adr(L, idx);
+  api_checkvalidindex(L, t);
+  v = lj_meta_tget(L, t, L->top-1);
+  if (v == NULL) {
+    L->top += 2;
+    lj_vm_call(L, L->top-2, 1+1);
+    L->top -= 2;
+    v = L->top+1;
+  }
+  copyTV(L, L->top-1, v);
+}
+
+LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
+{
+  cTValue *v, *t = index2adr(L, idx);
+  TValue key;
+  api_checkvalidindex(L, t);
+  setstrV(L, &key, lj_str_newz(L, k));
+  v = lj_meta_tget(L, t, &key);
+  if (v == NULL) {
+    L->top += 2;
+    lj_vm_call(L, L->top-2, 1+1);
+    L->top -= 2;
+    v = L->top+1;
+  }
+  copyTV(L, L->top, v);
+  incr_top(L);
+}
+
+LUA_API void lua_rawget(lua_State *L, int idx)
+{
+  cTValue *t = index2adr(L, idx);
+  api_check(L, tvistab(t));
+  copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1));
+}
+
+LUA_API void lua_rawgeti(lua_State *L, int idx, int n)
+{
+  cTValue *v, *t = index2adr(L, idx);
+  api_check(L, tvistab(t));
+  v = lj_tab_getint(tabV(t), n);
+  if (v) {
+    copyTV(L, L->top, v);
+  } else {
+    setnilV(L->top);
+  }
+  incr_top(L);
+}
+
+LUA_API int lua_getmetatable(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  GCtab *mt = NULL;
+  if (tvistab(o))
+    mt = tabref(tabV(o)->metatable);
+  else if (tvisudata(o))
+    mt = tabref(udataV(o)->metatable);
+  else
+    mt = tabref(basemt_obj(G(L), o));
+  if (mt == NULL)
+    return 0;
+  settabV(L, L->top, mt);
+  incr_top(L);
+  return 1;
+}
+
+LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field)
+{
+  if (lua_getmetatable(L, idx)) {
+    cTValue *tv = lj_tab_getstr(tabV(L->top-1), lj_str_newz(L, field));
+    if (tv && !tvisnil(tv)) {
+      copyTV(L, L->top-1, tv);
+      return 1;
+    }
+    L->top--;
+  }
+  return 0;
+}
+
+LUA_API void lua_getfenv(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  api_checkvalidindex(L, o);
+  if (tvisfunc(o)) {
+    settabV(L, L->top, tabref(funcV(o)->c.env));
+  } else if (tvisudata(o)) {
+    settabV(L, L->top, tabref(udataV(o)->env));
+  } else if (tvisthread(o)) {
+    settabV(L, L->top, tabref(threadV(o)->env));
+  } else {
+    setnilV(L->top);
+  }
+  incr_top(L);
+}
+
+LUA_API int lua_next(lua_State *L, int idx)
+{
+  cTValue *t = index2adr(L, idx);
+  int more;
+  api_check(L, tvistab(t));
+  more = lj_tab_next(L, tabV(t), L->top-1);
+  if (more) {
+    incr_top(L);  /* Return new key and value slot. */
+  } else {  /* End of traversal. */
+    L->top--;  /* Remove key slot. */
+  }
+  return more;
+}
+
+LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n)
+{
+  TValue *val;
+  const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val);
+  if (name) {
+    copyTV(L, L->top, val);
+    incr_top(L);
+  }
+  return name;
+}
+
+LUA_API void *lua_upvalueid(lua_State *L, int idx, int n)
+{
+  GCfunc *fn = funcV(index2adr(L, idx));
+  n--;
+  api_check(L, (uint32_t)n < fn->l.nupvalues);
+  return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
+			 (void *)&fn->c.upvalue[n];
+}
+
+LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2)
+{
+  GCfunc *fn1 = funcV(index2adr(L, idx1));
+  GCfunc *fn2 = funcV(index2adr(L, idx2));
+  n1--; n2--;
+  api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues);
+  api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues);
+  setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]);
+  lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
+}
+
+LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
+{
+  cTValue *o = index2adr(L, idx);
+  if (tvisudata(o)) {
+    GCudata *ud = udataV(o);
+    cTValue *tv = lj_tab_getstr(tabV(registry(L)), lj_str_newz(L, tname));
+    if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
+      return uddata(ud);
+  }
+  lj_err_argtype(L, idx, tname);
+  return NULL;  /* unreachable */
+}
+
+/* -- Object setters ------------------------------------------------------ */
+
+LUA_API void lua_settable(lua_State *L, int idx)
+{
+  TValue *o;
+  cTValue *t = index2adr(L, idx);
+  api_checknelems(L, 2);
+  api_checkvalidindex(L, t);
+  o = lj_meta_tset(L, t, L->top-2);
+  if (o) {
+    /* NOBARRIER: lj_meta_tset ensures the table is not black. */
+    copyTV(L, o, L->top-1);
+    L->top -= 2;
+  } else {
+    L->top += 3;
+    copyTV(L, L->top-1, L->top-6);
+    lj_vm_call(L, L->top-3, 0+1);
+    L->top -= 3;
+  }
+}
+
+LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
+{
+  TValue *o;
+  TValue key;
+  cTValue *t = index2adr(L, idx);
+  api_checknelems(L, 1);
+  api_checkvalidindex(L, t);
+  setstrV(L, &key, lj_str_newz(L, k));
+  o = lj_meta_tset(L, t, &key);
+  if (o) {
+    L->top--;
+    /* NOBARRIER: lj_meta_tset ensures the table is not black. */
+    copyTV(L, o, L->top);
+  } else {
+    L->top += 3;
+    copyTV(L, L->top-1, L->top-6);
+    lj_vm_call(L, L->top-3, 0+1);
+    L->top -= 2;
+  }
+}
+
+LUA_API void lua_rawset(lua_State *L, int idx)
+{
+  GCtab *t = tabV(index2adr(L, idx));
+  TValue *dst, *key;
+  api_checknelems(L, 2);
+  key = L->top-2;
+  dst = lj_tab_set(L, t, key);
+  copyTV(L, dst, key+1);
+  lj_gc_anybarriert(L, t);
+  L->top = key;
+}
+
+LUA_API void lua_rawseti(lua_State *L, int idx, int n)
+{
+  GCtab *t = tabV(index2adr(L, idx));
+  TValue *dst, *src;
+  api_checknelems(L, 1);
+  dst = lj_tab_setint(L, t, n);
+  src = L->top-1;
+  copyTV(L, dst, src);
+  lj_gc_barriert(L, t, dst);
+  L->top = src;
+}
+
+LUA_API int lua_setmetatable(lua_State *L, int idx)
+{
+  global_State *g;
+  GCtab *mt;
+  cTValue *o = index2adr(L, idx);
+  api_checknelems(L, 1);
+  api_checkvalidindex(L, o);
+  if (tvisnil(L->top-1)) {
+    mt = NULL;
+  } else {
+    api_check(L, tvistab(L->top-1));
+    mt = tabV(L->top-1);
+  }
+  g = G(L);
+  if (tvistab(o)) {
+    setgcref(tabV(o)->metatable, obj2gco(mt));
+    if (mt)
+      lj_gc_objbarriert(L, tabV(o), mt);
+  } else if (tvisudata(o)) {
+    setgcref(udataV(o)->metatable, obj2gco(mt));
+    if (mt)
+      lj_gc_objbarrier(L, udataV(o), mt);
+  } else {
+    /* Flush cache, since traces specialize to basemt. But not during __gc. */
+    if (lj_trace_flushall(L))
+      lj_err_caller(L, LJ_ERR_NOGCMM);
+    if (tvisbool(o)) {
+      /* NOBARRIER: basemt is a GC root. */
+      setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt));
+      setgcref(basemt_it(g, LJ_TFALSE), obj2gco(mt));
+    } else {
+      /* NOBARRIER: basemt is a GC root. */
+      setgcref(basemt_obj(g, o), obj2gco(mt));
+    }
+  }
+  L->top--;
+  return 1;
+}
+
+LUA_API int lua_setfenv(lua_State *L, int idx)
+{
+  cTValue *o = index2adr(L, idx);
+  GCtab *t;
+  api_checknelems(L, 1);
+  api_checkvalidindex(L, o);
+  api_check(L, tvistab(L->top-1));
+  t = tabV(L->top-1);
+  if (tvisfunc(o)) {
+    setgcref(funcV(o)->c.env, obj2gco(t));
+  } else if (tvisudata(o)) {
+    setgcref(udataV(o)->env, obj2gco(t));
+  } else if (tvisthread(o)) {
+    setgcref(threadV(o)->env, obj2gco(t));
+  } else {
+    L->top--;
+    return 0;
+  }
+  lj_gc_objbarrier(L, gcV(o), t);
+  L->top--;
+  return 1;
+}
+
+LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
+{
+  cTValue *f = index2adr(L, idx);
+  TValue *val;
+  const char *name;
+  api_checknelems(L, 1);
+  name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val);
+  if (name) {
+    L->top--;
+    copyTV(L, val, L->top);
+    lj_gc_barrier(L, funcV(f), L->top);
+  }
+  return name;
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+LUA_API void lua_call(lua_State *L, int nargs, int nresults)
+{
+  api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+  api_checknelems(L, nargs+1);
+  lj_vm_call(L, L->top - nargs, nresults+1);
+}
+
+LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
+{
+  global_State *g = G(L);
+  uint8_t oldh = hook_save(g);
+  ptrdiff_t ef;
+  int status;
+  api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+  api_checknelems(L, nargs+1);
+  if (errfunc == 0) {
+    ef = 0;
+  } else {
+    cTValue *o = stkindex2adr(L, errfunc);
+    api_checkvalidindex(L, o);
+    ef = savestack(L, o);
+  }
+  status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
+  if (status) hook_restore(g, oldh);
+  return status;
+}
+
+static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
+{
+  GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
+  fn->c.f = func;
+  setfuncV(L, L->top, fn);
+  setlightudV(L->top+1, checklightudptr(L, ud));
+  cframe_nres(L->cframe) = 1+0;  /* Zero results. */
+  L->top += 2;
+  return L->top-1;  /* Now call the newly allocated C function. */
+}
+
+LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
+{
+  global_State *g = G(L);
+  uint8_t oldh = hook_save(g);
+  int status;
+  api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+  status = lj_vm_cpcall(L, func, ud, cpcall);
+  if (status) hook_restore(g, oldh);
+  return status;
+}
+
+LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
+{
+  if (luaL_getmetafield(L, idx, field)) {
+    TValue *base = L->top--;
+    copyTV(L, base, index2adr(L, idx));
+    L->top = base+1;
+    lj_vm_call(L, base, 1+1);
+    return 1;
+  }
+  return 0;
+}
+
+/* -- Coroutine yield and resume ------------------------------------------ */
+
+LUA_API int lua_yield(lua_State *L, int nresults)
+{
+  void *cf = L->cframe;
+  global_State *g = G(L);
+  if (cframe_canyield(cf)) {
+    cf = cframe_raw(cf);
+    if (!hook_active(g)) {  /* Regular yield: move results down if needed. */
+      cTValue *f = L->top - nresults;
+      if (f > L->base) {
+	TValue *t = L->base;
+	while (--nresults >= 0) copyTV(L, t++, f++);
+	L->top = t;
+      }
+      L->cframe = NULL;
+      L->status = LUA_YIELD;
+      return -1;
+    } else {  /* Yield from hook: add a pseudo-frame. */
+      TValue *top = L->top;
+      hook_leave(g);
+      top->u64 = cframe_multres(cf);
+      setcont(top+1, lj_cont_hook);
+      setframe_pc(top+1, cframe_pc(cf)-1);
+      setframe_gc(top+2, obj2gco(L));
+      setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT);
+      L->top = L->base = top+3;
+#if LJ_TARGET_X64
+      lj_err_throw(L, LUA_YIELD);
+#else
+      L->cframe = NULL;
+      L->status = LUA_YIELD;
+      lj_vm_unwind_c(cf, LUA_YIELD);
+#endif
+    }
+  }
+  lj_err_msg(L, LJ_ERR_CYIELD);
+  return 0;  /* unreachable */
+}
+
+LUA_API int lua_resume(lua_State *L, int nargs)
+{
+  if (L->cframe == NULL && L->status <= LUA_YIELD)
+    return lj_vm_resume(L, L->top - nargs, 0, 0);
+  L->top = L->base;
+  setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
+  incr_top(L);
+  return LUA_ERRRUN;
+}
+
+/* -- GC and memory management -------------------------------------------- */
+
+LUA_API int lua_gc(lua_State *L, int what, int data)
+{
+  global_State *g = G(L);
+  int res = 0;
+  switch (what) {
+  case LUA_GCSTOP:
+    g->gc.threshold = LJ_MAX_MEM;
+    break;
+  case LUA_GCRESTART:
+    g->gc.threshold = data == -1 ? (g->gc.total/100)*g->gc.pause : g->gc.total;
+    break;
+  case LUA_GCCOLLECT:
+    lj_gc_fullgc(L);
+    break;
+  case LUA_GCCOUNT:
+    res = (int)(g->gc.total >> 10);
+    break;
+  case LUA_GCCOUNTB:
+    res = (int)(g->gc.total & 0x3ff);
+    break;
+  case LUA_GCSTEP: {
+    MSize a = (MSize)data << 10;
+    g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
+    while (g->gc.total >= g->gc.threshold)
+      if (lj_gc_step(L) > 0) {
+	res = 1;
+	break;
+      }
+    break;
+  }
+  case LUA_GCSETPAUSE:
+    res = (int)(g->gc.pause);
+    g->gc.pause = (MSize)data;
+    break;
+  case LUA_GCSETSTEPMUL:
+    res = (int)(g->gc.stepmul);
+    g->gc.stepmul = (MSize)data;
+    break;
+  default:
+    res = -1;  /* Invalid option. */
+  }
+  return res;
+}
+
+LUA_API lua_Alloc lua_getallocf(lua_State *L, void **ud)
+{
+  global_State *g = G(L);
+  if (ud) *ud = g->allocd;
+  return g->allocf;
+}
+
+LUA_API void lua_setallocf(lua_State *L, lua_Alloc f, void *ud)
+{
+  global_State *g = G(L);
+  g->allocd = ud;
+  g->allocf = f;
+}
+

+ 437 - 0
luajit.mod/luajit/src/lj_arch.h

@@ -0,0 +1,437 @@
+/*
+** Target architecture selection.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_ARCH_H
+#define _LJ_ARCH_H
+
+#include "lua.h"
+
+/* Target endianess. */
+#define LUAJIT_LE	0
+#define LUAJIT_BE	1
+
+/* Target architectures. */
+#define LUAJIT_ARCH_X86		1
+#define LUAJIT_ARCH_x86		1
+#define LUAJIT_ARCH_X64		2
+#define LUAJIT_ARCH_x64		2
+#define LUAJIT_ARCH_ARM		3
+#define LUAJIT_ARCH_arm		3
+#define LUAJIT_ARCH_PPC		4
+#define LUAJIT_ARCH_ppc		4
+#define LUAJIT_ARCH_PPCSPE	5
+#define LUAJIT_ARCH_ppcspe	5
+#define LUAJIT_ARCH_MIPS	6
+#define LUAJIT_ARCH_mips	6
+
+/* Target OS. */
+#define LUAJIT_OS_OTHER		0
+#define LUAJIT_OS_WINDOWS	1
+#define LUAJIT_OS_LINUX		2
+#define LUAJIT_OS_OSX		3
+#define LUAJIT_OS_BSD		4
+#define LUAJIT_OS_POSIX		5
+
+/* Select native target if no target defined. */
+#ifndef LUAJIT_TARGET
+
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
+#define LUAJIT_TARGET	LUAJIT_ARCH_X86
+#elif defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+#define LUAJIT_TARGET	LUAJIT_ARCH_X64
+#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
+#define LUAJIT_TARGET	LUAJIT_ARCH_ARM
+#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
+#ifdef __NO_FPRS__
+#define LUAJIT_TARGET	LUAJIT_ARCH_PPCSPE
+#else
+#define LUAJIT_TARGET	LUAJIT_ARCH_PPC
+#endif
+#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
+#define LUAJIT_TARGET	LUAJIT_ARCH_MIPS
+#else
+#error "No support for this architecture (yet)"
+#endif
+
+#endif
+
+/* Select native OS if no target OS defined. */
+#ifndef LUAJIT_OS
+
+#if defined(_WIN32) && !defined(_XBOX_VER)
+#define LUAJIT_OS	LUAJIT_OS_WINDOWS
+#elif defined(__linux__)
+#define LUAJIT_OS	LUAJIT_OS_LINUX
+#elif defined(__MACH__) && defined(__APPLE__)
+#define LUAJIT_OS	LUAJIT_OS_OSX
+#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
+       defined(__NetBSD__) || defined(__OpenBSD__) || \
+       defined(__DragonFly__)) && !defined(__ORBIS__)
+#define LUAJIT_OS	LUAJIT_OS_BSD
+#elif (defined(__sun__) && defined(__svr4__)) || defined(__CYGWIN__)
+#define LUAJIT_OS	LUAJIT_OS_POSIX
+#else
+#define LUAJIT_OS	LUAJIT_OS_OTHER
+#endif
+
+#endif
+
+/* Set target OS properties. */
+#if LUAJIT_OS == LUAJIT_OS_WINDOWS
+#define LJ_OS_NAME	"Windows"
+#elif LUAJIT_OS == LUAJIT_OS_LINUX
+#define LJ_OS_NAME	"Linux"
+#elif LUAJIT_OS == LUAJIT_OS_OSX
+#define LJ_OS_NAME	"OSX"
+#elif LUAJIT_OS == LUAJIT_OS_BSD
+#define LJ_OS_NAME	"BSD"
+#elif LUAJIT_OS == LUAJIT_OS_POSIX
+#define LJ_OS_NAME	"POSIX"
+#else
+#define LJ_OS_NAME	"Other"
+#endif
+
+#define LJ_TARGET_WINDOWS	(LUAJIT_OS == LUAJIT_OS_WINDOWS)
+#define LJ_TARGET_LINUX		(LUAJIT_OS == LUAJIT_OS_LINUX)
+#define LJ_TARGET_OSX		(LUAJIT_OS == LUAJIT_OS_OSX)
+#define LJ_TARGET_IOS		(LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM)
+#define LJ_TARGET_POSIX		(LUAJIT_OS > LUAJIT_OS_WINDOWS)
+#define LJ_TARGET_DLOPEN	LJ_TARGET_POSIX
+
+#ifdef __CELLOS_LV2__
+#define LJ_TARGET_PS3		1
+#define LJ_TARGET_CONSOLE	1
+#endif
+
+#ifdef __ORBIS__
+#define LJ_TARGET_PS4		1
+#define LJ_TARGET_CONSOLE	1
+#undef NULL
+#define NULL ((void*)0)
+#endif
+
+#ifdef __psp2__
+#define LJ_TARGET_PSVITA	1
+#define LJ_TARGET_CONSOLE	1
+#endif
+
+#if _XBOX_VER >= 200
+#define LJ_TARGET_XBOX360	1
+#define LJ_TARGET_CONSOLE	1
+#endif
+
+#define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
+#define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
+#define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
+#define LJ_NUMMODE_DUAL_SINGLE	3	/* Default to dual-number mode. */
+
+/* Set target architecture properties. */
+#if LUAJIT_TARGET == LUAJIT_ARCH_X86
+
+#define LJ_ARCH_NAME		"x86"
+#define LJ_ARCH_BITS		32
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#if LJ_TARGET_WINDOWS || __CYGWIN__
+#define LJ_ABI_WIN		1
+#else
+#define LJ_ABI_WIN		0
+#endif
+#define LJ_TARGET_X86		1
+#define LJ_TARGET_X86ORX64	1
+#define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNALIGNED	1
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
+
+#elif LUAJIT_TARGET == LUAJIT_ARCH_X64
+
+#define LJ_ARCH_NAME		"x64"
+#define LJ_ARCH_BITS		64
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#define LJ_ABI_WIN		LJ_TARGET_WINDOWS
+#define LJ_TARGET_X64		1
+#define LJ_TARGET_X86ORX64	1
+#define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_JUMPRANGE	31	/* +-2^31 = +-2GB */
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNALIGNED	1
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
+
+#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
+
+#define LJ_ARCH_NAME		"arm"
+#define LJ_ARCH_BITS		32
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#if !defined(LJ_ARCH_HASFPU) && __SOFTFP__
+#define LJ_ARCH_HASFPU		0
+#endif
+#if !defined(LJ_ABI_SOFTFP) && !__ARM_PCS_VFP
+#define LJ_ABI_SOFTFP		1
+#endif
+#define LJ_ABI_EABI		1
+#define LJ_TARGET_ARM		1
+#define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
+#define LJ_TARGET_MASKSHIFT	0
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+
+#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
+#define LJ_ARCH_VERSION		80
+#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
+#define LJ_ARCH_VERSION		70
+#elif __ARM_ARCH_6T2__
+#define LJ_ARCH_VERSION		61
+#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
+#define LJ_ARCH_VERSION		60
+#else
+#define LJ_ARCH_VERSION		50
+#endif
+
+#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
+
+#define LJ_ARCH_NAME		"ppc"
+#if _LP64
+#define LJ_ARCH_BITS		64
+#else
+#define LJ_ARCH_BITS		32
+#endif
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#define LJ_TARGET_PPC		1
+#define LJ_TARGET_EHRETREG	3
+#define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
+#define LJ_TARGET_MASKSHIFT	0
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
+
+#if _ARCH_PWR7
+#define LJ_ARCH_VERSION		70
+#elif _ARCH_PWR6
+#define LJ_ARCH_VERSION		60
+#elif _ARCH_PWR5X
+#define LJ_ARCH_VERSION		51
+#elif _ARCH_PWR5
+#define LJ_ARCH_VERSION		50
+#elif _ARCH_PWR4
+#define LJ_ARCH_VERSION		40
+#else
+#define LJ_ARCH_VERSION		0
+#endif
+#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
+#define LJ_ARCH_PPC64		1
+#define LJ_ARCH_NOFFI		1
+#endif
+#if _ARCH_PPCSQ
+#define LJ_ARCH_SQRT		1
+#endif
+#if _ARCH_PWR5X
+#define LJ_ARCH_ROUND		1
+#endif
+#if __PPU__
+#define LJ_ARCH_CELL		1
+#endif
+#if LJ_TARGET_XBOX360
+#define LJ_ARCH_XENON		1
+#endif
+
+#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
+
+#define LJ_ARCH_NAME		"ppcspe"
+#define LJ_ARCH_BITS		32
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#ifndef LJ_ABI_SOFTFP
+#define LJ_ABI_SOFTFP		1
+#endif
+#define LJ_ABI_EABI		1
+#define LJ_TARGET_PPCSPE	1
+#define LJ_TARGET_EHRETREG	3
+#define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
+#define LJ_TARGET_MASKSHIFT	0
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
+#define LJ_ARCH_NOFFI		1	/* NYI: comparisons, calls. */
+#define LJ_ARCH_NOJIT		1
+
+#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
+
+#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
+#define LJ_ARCH_NAME		"mipsel"
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#else
+#define LJ_ARCH_NAME		"mips"
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#endif
+#define LJ_ARCH_BITS		32
+#define LJ_TARGET_MIPS		1
+#define LJ_TARGET_EHRETREG	4
+#define LJ_TARGET_JUMPRANGE	27	/* 2*2^27 = 256MB-aligned region */
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
+
+#if _MIPS_ARCH_MIPS32R2
+#define LJ_ARCH_VERSION		20
+#else
+#define LJ_ARCH_VERSION		10
+#endif
+
+#else
+#error "No target architecture defined"
+#endif
+
+#ifndef LJ_PAGESIZE
+#define LJ_PAGESIZE		4096
+#endif
+
+/* Check for minimum required compiler versions. */
+#if defined(__GNUC__)
+#if LJ_TARGET_X86
+#if (__GNUC__ < 3) || ((__GNUC__ == 3) && __GNUC_MINOR__ < 4)
+#error "Need at least GCC 3.4 or newer"
+#endif
+#elif LJ_TARGET_X64
+#if __GNUC__ < 4
+#error "Need at least GCC 4.0 or newer"
+#endif
+#elif LJ_TARGET_ARM
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
+#error "Need at least GCC 4.2 or newer"
+#endif
+#elif !LJ_TARGET_PS3
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
+#error "Need at least GCC 4.3 or newer"
+#endif
+#endif
+#endif
+
+/* Check target-specific constraints. */
+#ifndef _BUILDVM_H
+#if LJ_TARGET_X64
+#if __USING_SJLJ_EXCEPTIONS__
+#error "Need a C compiler with native exception handling on x64"
+#endif
+#elif LJ_TARGET_ARM
+#if defined(__ARMEB__)
+#error "No support for big-endian ARM"
+#endif
+#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
+#error "No support for Cortex-M CPUs"
+#endif
+#if !(__ARM_EABI__ || LJ_TARGET_IOS)
+#error "Only ARM EABI or iOS 3.0+ ABI is supported"
+#endif
+#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#error "No support for PowerPC CPUs without double-precision FPU"
+#endif
+#if defined(_LITTLE_ENDIAN)
+#error "No support for little-endian PowerPC"
+#endif
+#if defined(_LP64)
+#error "No support for PowerPC 64 bit mode"
+#endif
+#elif LJ_TARGET_MIPS
+#if defined(__mips_soft_float)
+#error "No support for MIPS CPUs without FPU"
+#endif
+#if defined(_LP64)
+#error "No support for MIPS64"
+#endif
+#endif
+#endif
+
+/* Enable or disable the dual-number mode for the VM. */
+#if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \
+    (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1)
+#error "No support for this number mode on this architecture"
+#endif
+#if LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL || \
+    (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL_SINGLE && LUAJIT_NUMMODE != 1) || \
+    (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE_DUAL && LUAJIT_NUMMODE == 2)
+#define LJ_DUALNUM		1
+#else
+#define LJ_DUALNUM		0
+#endif
+
+#if LJ_TARGET_IOS || LJ_TARGET_CONSOLE
+/* Runtime code generation is restricted on iOS. Complain to Apple, not me. */
+/* Ditto for the consoles. Complain to Sony or MS, not me. */
+#ifndef LUAJIT_ENABLE_JIT
+#define LJ_OS_NOJIT		1
+#endif
+#endif
+
+/* Disable or enable the JIT compiler. */
+#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
+#define LJ_HASJIT		0
+#else
+#define LJ_HASJIT		1
+#endif
+
+/* Disable or enable the FFI extension. */
+#if defined(LUAJIT_DISABLE_FFI) || defined(LJ_ARCH_NOFFI)
+#define LJ_HASFFI		0
+#else
+#define LJ_HASFFI		1
+#endif
+
+#ifndef LJ_ARCH_HASFPU
+#define LJ_ARCH_HASFPU		1
+#endif
+#ifndef LJ_ABI_SOFTFP
+#define LJ_ABI_SOFTFP		0
+#endif
+#define LJ_SOFTFP		(!LJ_ARCH_HASFPU)
+
+#if LJ_ARCH_ENDIAN == LUAJIT_BE
+#define LJ_LE			0
+#define LJ_BE			1
+#define LJ_ENDIAN_SELECT(le, be)	be
+#define LJ_ENDIAN_LOHI(lo, hi)		hi lo
+#else
+#define LJ_LE			1
+#define LJ_BE			0
+#define LJ_ENDIAN_SELECT(le, be)	le
+#define LJ_ENDIAN_LOHI(lo, hi)		lo hi
+#endif
+
+#if LJ_ARCH_BITS == 32
+#define LJ_32			1
+#define LJ_64			0
+#else
+#define LJ_32			0
+#define LJ_64			1
+#endif
+
+#ifndef LJ_TARGET_UNALIGNED
+#define LJ_TARGET_UNALIGNED	0
+#endif
+
+/* Various workarounds for embedded operating systems. */
+#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
+#define LUAJIT_NO_LOG2
+#endif
+#if defined(__symbian__)
+#define LUAJIT_NO_EXP2
+#endif
+
+#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3
+#define LJ_NO_UNWIND		1
+#endif
+
+/* Compatibility with Lua 5.1 vs. 5.2. */
+#ifdef LUAJIT_ENABLE_LUA52COMPAT
+#define LJ_52			1
+#else
+#define LJ_52			0
+#endif
+
+#endif

+ 1920 - 0
luajit.mod/luajit/src/lj_asm.c

@@ -0,0 +1,1920 @@
+/*
+** IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_asm_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_gc.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_frame.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_ircall.h"
+#include "lj_iropt.h"
+#include "lj_mcode.h"
+#include "lj_iropt.h"
+#include "lj_trace.h"
+#include "lj_snap.h"
+#include "lj_asm.h"
+#include "lj_dispatch.h"
+#include "lj_vm.h"
+#include "lj_target.h"
+
+#ifdef LUA_USE_ASSERT
+#include <stdio.h>
+#endif
+
+/* -- Assembler state and common macros ----------------------------------- */
+
+/* Assembler state. */
+typedef struct ASMState {
+  RegCost cost[RID_MAX];  /* Reference and blended allocation cost for regs. */
+
+  MCode *mcp;		/* Current MCode pointer (grows down). */
+  MCode *mclim;		/* Lower limit for MCode memory + red zone. */
+#ifdef LUA_USE_ASSERT
+  MCode *mcp_prev;	/* Red zone overflow check. */
+#endif
+
+  IRIns *ir;		/* Copy of pointer to IR instructions/constants. */
+  jit_State *J;		/* JIT compiler state. */
+
+#if LJ_TARGET_X86ORX64
+  x86ModRM mrm;		/* Fused x86 address operand. */
+#endif
+
+  RegSet freeset;	/* Set of free registers. */
+  RegSet modset;	/* Set of registers modified inside the loop. */
+  RegSet weakset;	/* Set of weakly referenced registers. */
+  RegSet phiset;	/* Set of PHI registers. */
+
+  uint32_t flags;	/* Copy of JIT compiler flags. */
+  int loopinv;		/* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */
+
+  int32_t evenspill;	/* Next even spill slot. */
+  int32_t oddspill;	/* Next odd spill slot (or 0). */
+
+  IRRef curins;		/* Reference of current instruction. */
+  IRRef stopins;	/* Stop assembly before hitting this instruction. */
+  IRRef orignins;	/* Original T->nins. */
+
+  IRRef snapref;	/* Current snapshot is active after this reference. */
+  IRRef snaprename;	/* Rename highwater mark for snapshot check. */
+  SnapNo snapno;	/* Current snapshot number. */
+  SnapNo loopsnapno;	/* Loop snapshot number. */
+
+  IRRef fuseref;	/* Fusion limit (loopref, 0 or FUSE_DISABLED). */
+  IRRef sectref;	/* Section base reference (loopref or 0). */
+  IRRef loopref;	/* Reference of LOOP instruction (or 0). */
+
+  BCReg topslot;	/* Number of slots for stack check (unless 0). */
+  int32_t gcsteps;	/* Accumulated number of GC steps (per section). */
+
+  GCtrace *T;		/* Trace to assemble. */
+  GCtrace *parent;	/* Parent trace (or NULL). */
+
+  MCode *mcbot;		/* Bottom of reserved MCode. */
+  MCode *mctop;		/* Top of generated MCode. */
+  MCode *mcloop;	/* Pointer to loop MCode (or NULL). */
+  MCode *invmcp;	/* Points to invertible loop branch (or NULL). */
+  MCode *flagmcp;	/* Pending opportunity to merge flag setting ins. */
+  MCode *realign;	/* Realign loop if not NULL. */
+
+#ifdef RID_NUM_KREF
+  int32_t krefk[RID_NUM_KREF];
+#endif
+  IRRef1 phireg[RID_MAX];  /* PHI register references. */
+  uint16_t parentmap[LJ_MAX_JSLOTS];  /* Parent instruction to RegSP map. */
+} ASMState;
+
+#define IR(ref)			(&as->ir[(ref)])
+
+#define ASMREF_TMP1		REF_TRUE	/* Temp. register. */
+#define ASMREF_TMP2		REF_FALSE	/* Temp. register. */
+#define ASMREF_L		REF_NIL		/* Stores register for L. */
+
+/* Check for variant to invariant references. */
+#define iscrossref(as, ref)	((ref) < as->sectref)
+
+/* Inhibit memory op fusion from variant to invariant references. */
+#define FUSE_DISABLED		(~(IRRef)0)
+#define mayfuse(as, ref)	((ref) > as->fuseref)
+#define neverfuse(as)		(as->fuseref == FUSE_DISABLED)
+#define canfuse(as, ir)		(!neverfuse(as) && !irt_isphi((ir)->t))
+#define opisfusableload(o) \
+  ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \
+   (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD)
+
+/* Sparse limit checks using a red zone before the actual limit. */
+#define MCLIM_REDZONE	64
+
+static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
+{
+  lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
+}
+
+static LJ_AINLINE void checkmclim(ASMState *as)
+{
+#ifdef LUA_USE_ASSERT
+  if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
+    IRIns *ir = IR(as->curins+1);
+    fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d  %02d %04d %04d\n", as->mcp,
+	    as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
+    lua_assert(0);
+  }
+#endif
+  if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
+#ifdef LUA_USE_ASSERT
+  as->mcp_prev = as->mcp;
+#endif
+}
+
+#ifdef RID_NUM_KREF
+#define ra_iskref(ref)		((ref) < RID_NUM_KREF)
+#define ra_krefreg(ref)		((Reg)(RID_MIN_KREF + (Reg)(ref)))
+#define ra_krefk(as, ref)	(as->krefk[(ref)])
+
+static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k)
+{
+  IRRef ref = (IRRef)(r - RID_MIN_KREF);
+  as->krefk[ref] = k;
+  as->cost[r] = REGCOST(ref, ref);
+}
+
+#else
+#define ra_iskref(ref)		0
+#define ra_krefreg(ref)		RID_MIN_GPR
+#define ra_krefk(as, ref)	0
+#endif
+
+/* Arch-specific field offsets. */
+static const uint8_t field_ofs[IRFL__MAX+1] = {
+#define FLOFS(name, ofs)	(uint8_t)(ofs),
+IRFLDEF(FLOFS)
+#undef FLOFS
+  0
+};
+
+/* -- Target-specific instruction emitter --------------------------------- */
+
+#if LJ_TARGET_X86ORX64
+#include "lj_emit_x86.h"
+#elif LJ_TARGET_ARM
+#include "lj_emit_arm.h"
+#elif LJ_TARGET_PPC
+#include "lj_emit_ppc.h"
+#elif LJ_TARGET_MIPS
+#include "lj_emit_mips.h"
+#else
+#error "Missing instruction emitter for target CPU"
+#endif
+
+/* -- Register allocator debugging ---------------------------------------- */
+
+/* #define LUAJIT_DEBUG_RA */
+
+#ifdef LUAJIT_DEBUG_RA
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#define RIDNAME(name)	#name,
+static const char *const ra_regname[] = {
+  GPRDEF(RIDNAME)
+  FPRDEF(RIDNAME)
+  VRIDDEF(RIDNAME)
+  NULL
+};
+#undef RIDNAME
+
+static char ra_dbg_buf[65536];
+static char *ra_dbg_p;
+static char *ra_dbg_merge;
+static MCode *ra_dbg_mcp;
+
+static void ra_dstart(void)
+{
+  ra_dbg_p = ra_dbg_buf;
+  ra_dbg_merge = NULL;
+  ra_dbg_mcp = NULL;
+}
+
+static void ra_dflush(void)
+{
+  fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout);
+  ra_dstart();
+}
+
+static void ra_dprintf(ASMState *as, const char *fmt, ...)
+{
+  char *p;
+  va_list argp;
+  va_start(argp, fmt);
+  p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p;
+  ra_dbg_mcp = NULL;
+  p += sprintf(p, "%08x  \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS);
+  for (;;) {
+    const char *e = strchr(fmt, '$');
+    if (e == NULL) break;
+    memcpy(p, fmt, (size_t)(e-fmt));
+    p += e-fmt;
+    if (e[1] == 'r') {
+      Reg r = va_arg(argp, Reg) & RID_MASK;
+      if (r <= RID_MAX) {
+	const char *q;
+	for (q = ra_regname[r]; *q; q++)
+	  *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
+      } else {
+	*p++ = '?';
+	lua_assert(0);
+      }
+    } else if (e[1] == 'f' || e[1] == 'i') {
+      IRRef ref;
+      if (e[1] == 'f')
+	ref = va_arg(argp, IRRef);
+      else
+	ref = va_arg(argp, IRIns *) - as->ir;
+      if (ref >= REF_BIAS)
+	p += sprintf(p, "%04d", ref - REF_BIAS);
+      else
+	p += sprintf(p, "K%03d", REF_BIAS - ref);
+    } else if (e[1] == 's') {
+      uint32_t slot = va_arg(argp, uint32_t);
+      p += sprintf(p, "[sp+0x%x]", sps_scale(slot));
+    } else if (e[1] == 'x') {
+      p += sprintf(p, "%08x", va_arg(argp, int32_t));
+    } else {
+      lua_assert(0);
+    }
+    fmt = e+2;
+  }
+  va_end(argp);
+  while (*fmt)
+    *p++ = *fmt++;
+  *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n';
+  if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) {
+    fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout);
+    p = ra_dbg_buf;
+  }
+  ra_dbg_p = p;
+}
+
+#define RA_DBG_START()	ra_dstart()
+#define RA_DBG_FLUSH()	ra_dflush()
+#define RA_DBG_REF() \
+  do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \
+       ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0)
+#define RA_DBGX(x)	ra_dprintf x
+
+#else
+#define RA_DBG_START()	((void)0)
+#define RA_DBG_FLUSH()	((void)0)
+#define RA_DBG_REF()	((void)0)
+#define RA_DBGX(x)	((void)0)
+#endif
+
+/* -- Register allocator -------------------------------------------------- */
+
+#define ra_free(as, r)		rset_set(as->freeset, (r))
+#define ra_modified(as, r)	rset_set(as->modset, (r))
+#define ra_weak(as, r)		rset_set(as->weakset, (r))
+#define ra_noweak(as, r)	rset_clear(as->weakset, (r))
+
+#define ra_used(ir)		(ra_hasreg((ir)->r) || ra_hasspill((ir)->s))
+
+/* Setup register allocator. */
+static void ra_setup(ASMState *as)
+{
+  Reg r;
+  /* Initially all regs (except the stack pointer) are free for use. */
+  as->freeset = RSET_INIT;
+  as->modset = RSET_EMPTY;
+  as->weakset = RSET_EMPTY;
+  as->phiset = RSET_EMPTY;
+  memset(as->phireg, 0, sizeof(as->phireg));
+  for (r = RID_MIN_GPR; r < RID_MAX; r++)
+    as->cost[r] = REGCOST(~0u, 0u);
+}
+
+/* Rematerialize constants. */
+static Reg ra_rematk(ASMState *as, IRRef ref)
+{
+  IRIns *ir;
+  Reg r;
+  if (ra_iskref(ref)) {
+    r = ra_krefreg(ref);
+    lua_assert(!rset_test(as->freeset, r));
+    ra_free(as, r);
+    ra_modified(as, r);
+    emit_loadi(as, r, ra_krefk(as, ref));
+    return r;
+  }
+  ir = IR(ref);
+  r = ir->r;
+  lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
+  ra_free(as, r);
+  ra_modified(as, r);
+  ir->r = RID_INIT;  /* Do not keep any hint. */
+  RA_DBGX((as, "remat     $i $r", ir, r));
+#if !LJ_SOFTFP
+  if (ir->o == IR_KNUM) {
+    emit_loadn(as, r, ir_knum(ir));
+  } else
+#endif
+  if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
+    ra_sethint(ir->r, RID_BASE);  /* Restore BASE register hint. */
+    emit_getgl(as, r, jit_base);
+  } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
+    lua_assert(irt_isnil(ir->t));  /* REF_NIL stores ASMREF_L register. */
+    emit_getgl(as, r, jit_L);
+#if LJ_64
+  } else if (ir->o == IR_KINT64) {
+    emit_loadu64(as, r, ir_kint64(ir)->u64);
+#endif
+  } else {
+    lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
+	       ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
+    emit_loadi(as, r, ir->i);
+  }
+  return r;
+}
+
+/* Force a spill. Allocate a new spill slot if needed. */
+static int32_t ra_spill(ASMState *as, IRIns *ir)
+{
+  int32_t slot = ir->s;
+  lua_assert(ir >= as->ir + REF_TRUE);
+  if (!ra_hasspill(slot)) {
+    if (irt_is64(ir->t)) {
+      slot = as->evenspill;
+      as->evenspill += 2;
+    } else if (as->oddspill) {
+      slot = as->oddspill;
+      as->oddspill = 0;
+    } else {
+      slot = as->evenspill;
+      as->oddspill = slot+1;
+      as->evenspill += 2;
+    }
+    if (as->evenspill > 256)
+      lj_trace_err(as->J, LJ_TRERR_SPILLOV);
+    ir->s = (uint8_t)slot;
+  }
+  return sps_scale(slot);
+}
+
+/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
+static Reg ra_releasetmp(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  Reg r = ir->r;
+  lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
+  ra_free(as, r);
+  ra_modified(as, r);
+  ir->r = RID_INIT;
+  return r;
+}
+
+/* Restore a register (marked as free). Rematerialize or force a spill. */
+static Reg ra_restore(ASMState *as, IRRef ref)
+{
+  if (emit_canremat(ref)) {
+    return ra_rematk(as, ref);
+  } else {
+    IRIns *ir = IR(ref);
+    int32_t ofs = ra_spill(as, ir);  /* Force a spill slot. */
+    Reg r = ir->r;
+    lua_assert(ra_hasreg(r));
+    ra_sethint(ir->r, r);  /* Keep hint. */
+    ra_free(as, r);
+    if (!rset_test(as->weakset, r)) {  /* Only restore non-weak references. */
+      ra_modified(as, r);
+      RA_DBGX((as, "restore   $i $r", ir, r));
+      emit_spload(as, ir, r, ofs);
+    }
+    return r;
+  }
+}
+
+/* Save a register to a spill slot. */
+static void ra_save(ASMState *as, IRIns *ir, Reg r)
+{
+  RA_DBGX((as, "save      $i $r", ir, r));
+  emit_spstore(as, ir, r, sps_scale(ir->s));
+}
+
+#define MINCOST(name) \
+  if (rset_test(RSET_ALL, RID_##name) && \
+      LJ_LIKELY(allow&RID2RSET(RID_##name)) && as->cost[RID_##name] < cost) \
+    cost = as->cost[RID_##name];
+
+/* Evict the register with the lowest cost, forcing a restore. */
+static Reg ra_evict(ASMState *as, RegSet allow)
+{
+  IRRef ref;
+  RegCost cost = ~(RegCost)0;
+  lua_assert(allow != RSET_EMPTY);
+  if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
+    GPRDEF(MINCOST)
+  } else {
+    FPRDEF(MINCOST)
+  }
+  ref = regcost_ref(cost);
+  lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins));
+  /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
+  if (!irref_isk(ref) && (as->weakset & allow)) {
+    IRIns *ir = IR(ref);
+    if (!rset_test(as->weakset, ir->r))
+      ref = regcost_ref(as->cost[rset_pickbot((as->weakset & allow))]);
+  }
+  return ra_restore(as, ref);
+}
+
+/* Pick any register (marked as free). Evict on-demand. */
+static Reg ra_pick(ASMState *as, RegSet allow)
+{
+  RegSet pick = as->freeset & allow;
+  if (!pick)
+    return ra_evict(as, allow);
+  else
+    return rset_picktop(pick);
+}
+
+/* Get a scratch register (marked as free). */
+static Reg ra_scratch(ASMState *as, RegSet allow)
+{
+  Reg r = ra_pick(as, allow);
+  ra_modified(as, r);
+  RA_DBGX((as, "scratch        $r", r));
+  return r;
+}
+
+/* Evict all registers from a set (if not free). */
+static void ra_evictset(ASMState *as, RegSet drop)
+{
+  RegSet work;
+  as->modset |= drop;
+#if !LJ_SOFTFP
+  work = (drop & ~as->freeset) & RSET_FPR;
+  while (work) {
+    Reg r = rset_pickbot(work);
+    ra_restore(as, regcost_ref(as->cost[r]));
+    rset_clear(work, r);
+    checkmclim(as);
+  }
+#endif
+  work = (drop & ~as->freeset);
+  while (work) {
+    Reg r = rset_pickbot(work);
+    ra_restore(as, regcost_ref(as->cost[r]));
+    rset_clear(work, r);
+    checkmclim(as);
+  }
+}
+
+/* Evict (rematerialize) all registers allocated to constants. */
+static void ra_evictk(ASMState *as)
+{
+  RegSet work;
+#if !LJ_SOFTFP
+  work = ~as->freeset & RSET_FPR;
+  while (work) {
+    Reg r = rset_pickbot(work);
+    IRRef ref = regcost_ref(as->cost[r]);
+    if (emit_canremat(ref) && irref_isk(ref)) {
+      ra_rematk(as, ref);
+      checkmclim(as);
+    }
+    rset_clear(work, r);
+  }
+#endif
+  work = ~as->freeset & RSET_GPR;
+  while (work) {
+    Reg r = rset_pickbot(work);
+    IRRef ref = regcost_ref(as->cost[r]);
+    if (emit_canremat(ref) && irref_isk(ref)) {
+      ra_rematk(as, ref);
+      checkmclim(as);
+    }
+    rset_clear(work, r);
+  }
+}
+
+#ifdef RID_NUM_KREF
+/* Allocate a register for a constant. */
+static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
+{
+  /* First try to find a register which already holds the same constant. */
+  RegSet pick, work = ~as->freeset & RSET_GPR;
+  Reg r;
+  while (work) {
+    IRRef ref;
+    r = rset_pickbot(work);
+    ref = regcost_ref(as->cost[r]);
+    if (ref < ASMREF_L &&
+	k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
+      return r;
+    rset_clear(work, r);
+  }
+  pick = as->freeset & allow;
+  if (pick) {
+    /* Constants should preferably get unmodified registers. */
+    if ((pick & ~as->modset))
+      pick &= ~as->modset;
+    r = rset_pickbot(pick);  /* Reduce conflicts with inverse allocation. */
+  } else {
+    r = ra_evict(as, allow);
+  }
+  RA_DBGX((as, "allock    $x $r", k, r));
+  ra_setkref(as, r, k);
+  rset_clear(as->freeset, r);
+  ra_noweak(as, r);
+  return r;
+}
+
+/* Allocate a specific register for a constant. */
+static void ra_allockreg(ASMState *as, int32_t k, Reg r)
+{
+  Reg kr = ra_allock(as, k, RID2RSET(r));
+  if (kr != r) {
+    IRIns irdummy;
+    irdummy.t.irt = IRT_INT;
+    ra_scratch(as, RID2RSET(r));
+    emit_movrr(as, &irdummy, r, kr);
+  }
+}
+#else
+#define ra_allockreg(as, k, r)		emit_loadi(as, (r), (k))
+#endif
+
+/* Allocate a register for ref from the allowed set of registers.
+** Note: this function assumes the ref does NOT have a register yet!
+** Picks an optimal register, sets the cost and marks the register as non-free.
+*/
+static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  RegSet pick = as->freeset & allow;
+  Reg r;
+  lua_assert(ra_noreg(ir->r));
+  if (pick) {
+    /* First check register hint from propagation or PHI. */
+    if (ra_hashint(ir->r)) {
+      r = ra_gethint(ir->r);
+      if (rset_test(pick, r))  /* Use hint register if possible. */
+	goto found;
+      /* Rematerialization is cheaper than missing a hint. */
+      if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) {
+	ra_rematk(as, regcost_ref(as->cost[r]));
+	goto found;
+      }
+      RA_DBGX((as, "hintmiss  $f $r", ref, r));
+    }
+    /* Invariants should preferably get unmodified registers. */
+    if (ref < as->loopref && !irt_isphi(ir->t)) {
+      if ((pick & ~as->modset))
+	pick &= ~as->modset;
+      r = rset_pickbot(pick);  /* Reduce conflicts with inverse allocation. */
+    } else {
+      /* We've got plenty of regs, so get callee-save regs if possible. */
+      if (RID_NUM_GPR > 8 && (pick & ~RSET_SCRATCH))
+	pick &= ~RSET_SCRATCH;
+      r = rset_picktop(pick);
+    }
+  } else {
+    r = ra_evict(as, allow);
+  }
+found:
+  RA_DBGX((as, "alloc     $f $r", ref, r));
+  ir->r = (uint8_t)r;
+  rset_clear(as->freeset, r);
+  ra_noweak(as, r);
+  as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t));
+  return r;
+}
+
+/* Allocate a register on-demand. */
+static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
+{
+  Reg r = IR(ref)->r;
+  /* Note: allow is ignored if the register is already allocated. */
+  if (ra_noreg(r)) r = ra_allocref(as, ref, allow);
+  ra_noweak(as, r);
+  return r;
+}
+
+/* Rename register allocation and emit move. */
+static void ra_rename(ASMState *as, Reg down, Reg up)
+{
+  IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
+  IRIns *ir = IR(ref);
+  ir->r = (uint8_t)up;
+  as->cost[down] = 0;
+  lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
+  lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up));
+  ra_free(as, down);  /* 'down' is free ... */
+  ra_modified(as, down);
+  rset_clear(as->freeset, up);  /* ... and 'up' is now allocated. */
+  ra_noweak(as, up);
+  RA_DBGX((as, "rename    $f $r $r", regcost_ref(as->cost[up]), down, up));
+  emit_movrr(as, ir, down, up);  /* Backwards codegen needs inverse move. */
+  if (!ra_hasspill(IR(ref)->s)) {  /* Add the rename to the IR. */
+    lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
+    ren = tref_ref(lj_ir_emit(as->J));
+    as->ir = as->T->ir;  /* The IR may have been reallocated. */
+    IR(ren)->r = (uint8_t)down;
+    IR(ren)->s = SPS_NONE;
+  }
+}
+
+/* Pick a destination register (marked as free).
+** Caveat: allow is ignored if there's already a destination register.
+** Use ra_destreg() to get a specific register.
+*/
+static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow)
+{
+  Reg dest = ir->r;
+  if (ra_hasreg(dest)) {
+    ra_free(as, dest);
+    ra_modified(as, dest);
+  } else {
+    if (ra_hashint(dest) && rset_test((as->freeset&allow), ra_gethint(dest))) {
+      dest = ra_gethint(dest);
+      ra_modified(as, dest);
+      RA_DBGX((as, "dest           $r", dest));
+    } else {
+      dest = ra_scratch(as, allow);
+    }
+    ir->r = dest;
+  }
+  if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest);
+  return dest;
+}
+
+/* Force a specific destination register (marked as free). */
+static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
+{
+  Reg dest = ra_dest(as, ir, RID2RSET(r));
+  if (dest != r) {
+    lua_assert(rset_test(as->freeset, r));
+    ra_modified(as, r);
+    emit_movrr(as, ir, dest, r);
+  }
+}
+
+#if LJ_TARGET_X86ORX64
+/* Propagate dest register to left reference. Emit moves as needed.
+** This is a required fixup step for all 2-operand machine instructions.
+*/
+static void ra_left(ASMState *as, Reg dest, IRRef lref)
+{
+  IRIns *ir = IR(lref);
+  Reg left = ir->r;
+  if (ra_noreg(left)) {
+    if (irref_isk(lref)) {
+      if (ir->o == IR_KNUM) {
+	cTValue *tv = ir_knum(ir);
+	/* FP remat needs a load except for +0. Still better than eviction. */
+	if (tvispzero(tv) || !(as->freeset & RSET_FPR)) {
+	  emit_loadn(as, dest, tv);
+	  return;
+	}
+#if LJ_64
+      } else if (ir->o == IR_KINT64) {
+	emit_loadu64(as, dest, ir_kint64(ir)->u64);
+	return;
+#endif
+      } else {
+	lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
+		   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
+	emit_loadi(as, dest, ir->i);
+	return;
+      }
+    }
+    if (!ra_hashint(left) && !iscrossref(as, lref))
+      ra_sethint(ir->r, dest);  /* Propagate register hint. */
+    left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR);
+  }
+  ra_noweak(as, left);
+  /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */
+  if (dest != left) {
+    /* Use register renaming if dest is the PHI reg. */
+    if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
+      ra_modified(as, left);
+      ra_rename(as, left, dest);
+    } else {
+      emit_movrr(as, ir, dest, left);
+    }
+  }
+}
+#else
+/* Similar to ra_left, except we override any hints. */
+static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
+{
+  IRIns *ir = IR(lref);
+  Reg left = ir->r;
+  if (ra_noreg(left)) {
+    ra_sethint(ir->r, dest);  /* Propagate register hint. */
+    left = ra_allocref(as, lref,
+		       (LJ_SOFTFP || dest < RID_MAX_GPR) ? RSET_GPR : RSET_FPR);
+  }
+  ra_noweak(as, left);
+  if (dest != left) {
+    /* Use register renaming if dest is the PHI reg. */
+    if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
+      ra_modified(as, left);
+      ra_rename(as, left, dest);
+    } else {
+      emit_movrr(as, ir, dest, left);
+    }
+  }
+}
+#endif
+
+#if !LJ_64
+/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
+static void ra_destpair(ASMState *as, IRIns *ir)
+{
+  Reg destlo = ir->r, desthi = (ir+1)->r;
+  /* First spill unrelated refs blocking the destination registers. */
+  if (!rset_test(as->freeset, RID_RETLO) &&
+      destlo != RID_RETLO && desthi != RID_RETLO)
+    ra_restore(as, regcost_ref(as->cost[RID_RETLO]));
+  if (!rset_test(as->freeset, RID_RETHI) &&
+      destlo != RID_RETHI && desthi != RID_RETHI)
+    ra_restore(as, regcost_ref(as->cost[RID_RETHI]));
+  /* Next free the destination registers (if any). */
+  if (ra_hasreg(destlo)) {
+    ra_free(as, destlo);
+    ra_modified(as, destlo);
+  } else {
+    destlo = RID_RETLO;
+  }
+  if (ra_hasreg(desthi)) {
+    ra_free(as, desthi);
+    ra_modified(as, desthi);
+  } else {
+    desthi = RID_RETHI;
+  }
+  /* Check for conflicts and shuffle the registers as needed. */
+  if (destlo == RID_RETHI) {
+    if (desthi == RID_RETLO) {
+#if LJ_TARGET_X86
+      *--as->mcp = XI_XCHGa + RID_RETHI;
+#else
+      emit_movrr(as, ir, RID_RETHI, RID_TMP);
+      emit_movrr(as, ir, RID_RETLO, RID_RETHI);
+      emit_movrr(as, ir, RID_TMP, RID_RETLO);
+#endif
+    } else {
+      emit_movrr(as, ir, RID_RETHI, RID_RETLO);
+      if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
+    }
+  } else if (desthi == RID_RETLO) {
+    emit_movrr(as, ir, RID_RETLO, RID_RETHI);
+    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
+  } else {
+    if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
+    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
+  }
+  /* Restore spill slots (if any). */
+  if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
+  if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
+}
+#endif
+
+/* -- Snapshot handling --------- ----------------------------------------- */
+
+/* Can we rematerialize a KNUM instead of forcing a spill? */
+static int asm_snap_canremat(ASMState *as)
+{
+  Reg r;
+  for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++)
+    if (irref_isk(regcost_ref(as->cost[r])))
+      return 1;
+  return 0;
+}
+
+/* Check whether a sunk store corresponds to an allocation. */
+static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
+{
+  if (irs->s == 255) {
+    if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
+	irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
+      IRIns *irk = IR(irs->op1);
+      if (irk->o == IR_AREF || irk->o == IR_HREFK)
+	irk = IR(irk->op1);
+      return (IR(irk->op1) == ira);
+    }
+    return 0;
+  } else {
+    return (ira + irs->s == irs);  /* Quick check. */
+  }
+}
+
+/* Allocate register or spill slot for a ref that escapes to a snapshot. */
+static void asm_snap_alloc1(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) {
+    if (ir->r == RID_SINK) {
+      ir->r = RID_SUNK;
+#if LJ_HASFFI
+      if (ir->o == IR_CNEWI) {  /* Allocate CNEWI value. */
+	asm_snap_alloc1(as, ir->op2);
+	if (LJ_32 && (ir+1)->o == IR_HIOP)
+	  asm_snap_alloc1(as, (ir+1)->op2);
+      } else
+#endif
+      {  /* Allocate stored values for TNEW, TDUP and CNEW. */
+	IRIns *irs;
+	lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW);
+	for (irs = IR(as->snapref-1); irs > ir; irs--)
+	  if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
+	    lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
+		       irs->o == IR_FSTORE || irs->o == IR_XSTORE);
+	    asm_snap_alloc1(as, irs->op2);
+	    if (LJ_32 && (irs+1)->o == IR_HIOP)
+	      asm_snap_alloc1(as, (irs+1)->op2);
+	  }
+      }
+    } else {
+      RegSet allow;
+      if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) {
+	IRIns *irc;
+	for (irc = IR(as->curins); irc > ir; irc--)
+	  if ((irc->op1 == ref || irc->op2 == ref) &&
+	      !(irc->r == RID_SINK || irc->r == RID_SUNK))
+	    goto nosink;  /* Don't sink conversion if result is used. */
+	asm_snap_alloc1(as, ir->op1);
+	return;
+      }
+    nosink:
+      allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR;
+      if ((as->freeset & allow) ||
+	       (allow == RSET_FPR && asm_snap_canremat(as))) {
+	/* Get a weak register if we have a free one or can rematerialize. */
+	Reg r = ra_allocref(as, ref, allow);  /* Allocate a register. */
+	if (!irt_isphi(ir->t))
+	  ra_weak(as, r);  /* But mark it as weakly referenced. */
+	checkmclim(as);
+	RA_DBGX((as, "snapreg   $f $r", ref, ir->r));
+      } else {
+	ra_spill(as, ir);  /* Otherwise force a spill slot. */
+	RA_DBGX((as, "snapspill $f $s", ref, ir->s));
+      }
+    }
+  }
+}
+
+/* Allocate refs escaping to a snapshot. */
+static void asm_snap_alloc(ASMState *as)
+{
+  SnapShot *snap = &as->T->snap[as->snapno];
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+  MSize n, nent = snap->nent;
+  for (n = 0; n < nent; n++) {
+    SnapEntry sn = map[n];
+    IRRef ref = snap_ref(sn);
+    if (!irref_isk(ref)) {
+      asm_snap_alloc1(as, ref);
+      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
+	lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP);
+	asm_snap_alloc1(as, ref+1);
+      }
+    }
+  }
+}
+
+/* All guards for a snapshot use the same exitno. This is currently the
+** same as the snapshot number. Since the exact origin of the exit cannot
+** be determined, all guards for the same snapshot must exit with the same
+** RegSP mapping.
+** A renamed ref which has been used in a prior guard for the same snapshot
+** would cause an inconsistency. The easy way out is to force a spill slot.
+*/
+static int asm_snap_checkrename(ASMState *as, IRRef ren)
+{
+  SnapShot *snap = &as->T->snap[as->snapno];
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+  MSize n, nent = snap->nent;
+  for (n = 0; n < nent; n++) {
+    SnapEntry sn = map[n];
+    IRRef ref = snap_ref(sn);
+    if (ref == ren || (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && ++ref == ren)) {
+      IRIns *ir = IR(ref);
+      ra_spill(as, ir);  /* Register renamed, so force a spill slot. */
+      RA_DBGX((as, "snaprensp $f $s", ref, ir->s));
+      return 1;  /* Found. */
+    }
+  }
+  return 0;  /* Not found. */
+}
+
+/* Prepare snapshot for next guard instruction. */
+static void asm_snap_prep(ASMState *as)
+{
+  if (as->curins < as->snapref) {
+    do {
+      if (as->snapno == 0) return;  /* Called by sunk stores before snap #0. */
+      as->snapno--;
+      as->snapref = as->T->snap[as->snapno].ref;
+    } while (as->curins < as->snapref);
+    asm_snap_alloc(as);
+    as->snaprename = as->T->nins;
+  } else {
+    /* Process any renames above the highwater mark. */
+    for (; as->snaprename < as->T->nins; as->snaprename++) {
+      IRIns *ir = IR(as->snaprename);
+      if (asm_snap_checkrename(as, ir->op1))
+	ir->op2 = REF_BIAS-1;  /* Kill rename. */
+    }
+  }
+}
+
+/* -- Miscellaneous helpers ----------------------------------------------- */
+
+/* Collect arguments from CALL* and CARG instructions. */
+static void asm_collectargs(ASMState *as, IRIns *ir,
+			    const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n = CCI_NARGS(ci);
+  lua_assert(n <= CCI_NARGS_MAX*2);  /* Account for split args. */
+  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+  while (n-- > 1) {
+    ir = IR(ir->op1);
+    lua_assert(ir->o == IR_CARG);
+    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
+  }
+  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
+  lua_assert(IR(ir->op1)->o != IR_CARG);
+}
+
+/* Reconstruct CCallInfo flags for CALLX*. */
+static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
+{
+  uint32_t nargs = 0;
+  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
+    IRIns *ira = IR(ir->op1);
+    nargs++;
+    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
+  }
+#if LJ_HASFFI
+  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
+    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
+    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
+    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
+#if LJ_TARGET_X86
+    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
+#endif
+  }
+#endif
+  return (nargs | (ir->t.irt << CCI_OTSHIFT));
+}
+
+/* Calculate stack adjustment. */
+static int32_t asm_stack_adjust(ASMState *as)
+{
+  if (as->evenspill <= SPS_FIXED)
+    return 0;
+  return sps_scale(sps_align(as->evenspill));
+}
+
+/* Must match with hash*() in lj_tab.c. */
+static uint32_t ir_khash(IRIns *ir)
+{
+  uint32_t lo, hi;
+  if (irt_isstr(ir->t)) {
+    return ir_kstr(ir)->hash;
+  } else if (irt_isnum(ir->t)) {
+    lo = ir_knum(ir)->u32.lo;
+    hi = ir_knum(ir)->u32.hi << 1;
+  } else if (irt_ispri(ir->t)) {
+    lua_assert(!irt_isnil(ir->t));
+    return irt_type(ir->t)-IRT_FALSE;
+  } else {
+    lua_assert(irt_isgcv(ir->t));
+    lo = u32ptr(ir_kgc(ir));
+    hi = lo + HASH_BIAS;
+  }
+  return hashrot(lo, hi);
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args);
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci);
+
+static void asm_snew(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
+  IRRef args[3];
+  args[0] = ASMREF_L;  /* lua_State *L    */
+  args[1] = ir->op1;   /* const char *str */
+  args[2] = ir->op2;   /* size_t len      */
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCstr * */
+  asm_gencall(as, ci, args);
+}
+
+static void asm_tnew(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
+  IRRef args[2];
+  args[0] = ASMREF_L;     /* lua_State *L    */
+  args[1] = ASMREF_TMP1;  /* uint32_t ahsize */
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCtab * */
+  asm_gencall(as, ci, args);
+  ra_allockreg(as, ir->op1 | (ir->op2 << 24), ra_releasetmp(as, ASMREF_TMP1));
+}
+
+static void asm_tdup(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
+  IRRef args[2];
+  args[0] = ASMREF_L;  /* lua_State *L    */
+  args[1] = ir->op1;   /* const GCtab *kt */
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCtab * */
+  asm_gencall(as, ci, args);
+}
+
+static void asm_gc_check(ASMState *as);
+
+/* Explicit GC step. */
+static void asm_gcstep(ASMState *as, IRIns *ir)
+{
+  IRIns *ira;
+  for (ira = IR(as->stopins+1); ira < ir; ira++)
+    if ((ira->o == IR_TNEW || ira->o == IR_TDUP ||
+	 (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI))) &&
+	ra_used(ira))
+      as->gcsteps++;
+  if (as->gcsteps)
+    asm_gc_check(as);
+  as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
+}
+
+/* -- PHI and loop handling ----------------------------------------------- */
+
+/* Break a PHI cycle by renaming to a free register (evict if needed). */
+static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby,
+			  RegSet allow)
+{
+  RegSet candidates = blocked & allow;
+  if (candidates) {  /* If this register file has candidates. */
+    /* Note: the set for ra_pick cannot be empty, since each register file
+    ** has some registers never allocated to PHIs.
+    */
+    Reg down, up = ra_pick(as, ~blocked & allow);  /* Get a free register. */
+    if (candidates & ~blockedby)  /* Optimize shifts, else it's a cycle. */
+      candidates = candidates & ~blockedby;
+    down = rset_picktop(candidates);  /* Pick candidate PHI register. */
+    ra_rename(as, down, up);  /* And rename it to the free register. */
+  }
+}
+
+/* PHI register shuffling.
+**
+** The allocator tries hard to preserve PHI register assignments across
+** the loop body. Most of the time this loop does nothing, since there
+** are no register mismatches.
+**
+** If a register mismatch is detected and ...
+** - the register is currently free: rename it.
+** - the register is blocked by an invariant: restore/remat and rename it.
+** - Otherwise the register is used by another PHI, so mark it as blocked.
+**
+** The renames are order-sensitive, so just retry the loop if a register
+** is marked as blocked, but has been freed in the meantime. A cycle is
+** detected if all of the blocked registers are allocated. To break the
+** cycle rename one of them to a free register and retry.
+**
+** Note that PHI spill slots are kept in sync and don't need to be shuffled.
+*/
+static void asm_phi_shuffle(ASMState *as)
+{
+  RegSet work;
+
+  /* Find and resolve PHI register mismatches. */
+  for (;;) {
+    RegSet blocked = RSET_EMPTY;
+    RegSet blockedby = RSET_EMPTY;
+    RegSet phiset = as->phiset;
+    while (phiset) {  /* Check all left PHI operand registers. */
+      Reg r = rset_pickbot(phiset);
+      IRIns *irl = IR(as->phireg[r]);
+      Reg left = irl->r;
+      if (r != left) {  /* Mismatch? */
+	if (!rset_test(as->freeset, r)) {  /* PHI register blocked? */
+	  IRRef ref = regcost_ref(as->cost[r]);
+	  /* Blocked by other PHI (w/reg)? */
+	  if (!ra_iskref(ref) && irt_ismarked(IR(ref)->t)) {
+	    rset_set(blocked, r);
+	    if (ra_hasreg(left))
+	      rset_set(blockedby, left);
+	    left = RID_NONE;
+	  } else {  /* Otherwise grab register from invariant. */
+	    ra_restore(as, ref);
+	    checkmclim(as);
+	  }
+	}
+	if (ra_hasreg(left)) {
+	  ra_rename(as, left, r);
+	  checkmclim(as);
+	}
+      }
+      rset_clear(phiset, r);
+    }
+    if (!blocked) break;  /* Finished. */
+    if (!(as->freeset & blocked)) {  /* Break cycles if none are free. */
+      asm_phi_break(as, blocked, blockedby, RSET_GPR);
+      if (!LJ_SOFTFP) asm_phi_break(as, blocked, blockedby, RSET_FPR);
+      checkmclim(as);
+    }  /* Else retry some more renames. */
+  }
+
+  /* Restore/remat invariants whose registers are modified inside the loop. */
+#if !LJ_SOFTFP
+  work = as->modset & ~(as->freeset | as->phiset) & RSET_FPR;
+  while (work) {
+    Reg r = rset_pickbot(work);
+    ra_restore(as, regcost_ref(as->cost[r]));
+    rset_clear(work, r);
+    checkmclim(as);
+  }
+#endif
+  work = as->modset & ~(as->freeset | as->phiset);
+  while (work) {
+    Reg r = rset_pickbot(work);
+    ra_restore(as, regcost_ref(as->cost[r]));
+    rset_clear(work, r);
+    checkmclim(as);
+  }
+
+  /* Allocate and save all unsaved PHI regs and clear marks. */
+  work = as->phiset;
+  while (work) {
+    Reg r = rset_picktop(work);
+    IRRef lref = as->phireg[r];
+    IRIns *ir = IR(lref);
+    if (ra_hasspill(ir->s)) {  /* Left PHI gained a spill slot? */
+      irt_clearmark(ir->t);  /* Handled here, so clear marker now. */
+      ra_alloc1(as, lref, RID2RSET(r));
+      ra_save(as, ir, r);  /* Save to spill slot inside the loop. */
+      checkmclim(as);
+    }
+    rset_clear(work, r);
+  }
+}
+
+/* Copy unsynced left/right PHI spill slots. Rarely needed. */
+static void asm_phi_copyspill(ASMState *as)
+{
+  int need = 0;
+  IRIns *ir;
+  for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--)
+    if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s))
+      need |= irt_isfp(ir->t) ? 2 : 1;  /* Unsynced spill slot? */
+  if ((need & 1)) {  /* Copy integer spill slots. */
+#if !LJ_TARGET_X86ORX64
+    Reg r = RID_TMP;
+#else
+    Reg r = RID_RET;
+    if ((as->freeset & RSET_GPR))
+      r = rset_pickbot((as->freeset & RSET_GPR));
+    else
+      emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
+#endif
+    for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
+      if (ra_hasspill(ir->s)) {
+	IRIns *irl = IR(ir->op1);
+	if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) {
+	  emit_spstore(as, irl, r, sps_scale(irl->s));
+	  emit_spload(as, ir, r, sps_scale(ir->s));
+	  checkmclim(as);
+	}
+      }
+    }
+#if LJ_TARGET_X86ORX64
+    if (!rset_test(as->freeset, r))
+      emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
+#endif
+  }
+#if !LJ_SOFTFP
+  if ((need & 2)) {  /* Copy FP spill slots. */
+#if LJ_TARGET_X86
+    Reg r = RID_XMM0;
+#else
+    Reg r = RID_FPRET;
+#endif
+    if ((as->freeset & RSET_FPR))
+      r = rset_pickbot((as->freeset & RSET_FPR));
+    if (!rset_test(as->freeset, r))
+      emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
+    for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
+      if (ra_hasspill(ir->s)) {
+	IRIns *irl = IR(ir->op1);
+	if (ra_hasspill(irl->s) && irt_isfp(ir->t)) {
+	  emit_spstore(as, irl, r, sps_scale(irl->s));
+	  emit_spload(as, ir, r, sps_scale(ir->s));
+	  checkmclim(as);
+	}
+      }
+    }
+    if (!rset_test(as->freeset, r))
+      emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
+  }
+#endif
+}
+
+/* Emit renames for left PHIs which are only spilled outside the loop. */
+static void asm_phi_fixup(ASMState *as)
+{
+  RegSet work = as->phiset;
+  while (work) {
+    Reg r = rset_picktop(work);
+    IRRef lref = as->phireg[r];
+    IRIns *ir = IR(lref);
+    if (irt_ismarked(ir->t)) {
+      irt_clearmark(ir->t);
+      /* Left PHI gained a spill slot before the loop? */
+      if (ra_hasspill(ir->s)) {
+	IRRef ren;
+	lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
+	ren = tref_ref(lj_ir_emit(as->J));
+	as->ir = as->T->ir;  /* The IR may have been reallocated. */
+	IR(ren)->r = (uint8_t)r;
+	IR(ren)->s = SPS_NONE;
+      }
+    }
+    rset_clear(work, r);
+  }
+}
+
+/* Setup right PHI reference. */
+static void asm_phi(ASMState *as, IRIns *ir)
+{
+  RegSet allow = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) &
+		 ~as->phiset;
+  RegSet afree = (as->freeset & allow);
+  IRIns *irl = IR(ir->op1);
+  IRIns *irr = IR(ir->op2);
+  if (ir->r == RID_SINK)  /* Sink PHI. */
+    return;
+  /* Spill slot shuffling is not implemented yet (but rarely needed). */
+  if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
+    lj_trace_err(as->J, LJ_TRERR_NYIPHI);
+  /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */
+  if ((afree & (afree-1))) {  /* Two or more free registers? */
+    Reg r;
+    if (ra_noreg(irr->r)) {  /* Get a register for the right PHI. */
+      r = ra_allocref(as, ir->op2, allow);
+    } else {  /* Duplicate right PHI, need a copy (rare). */
+      r = ra_scratch(as, allow);
+      emit_movrr(as, irr, r, irr->r);
+    }
+    ir->r = (uint8_t)r;
+    rset_set(as->phiset, r);
+    as->phireg[r] = (IRRef1)ir->op1;
+    irt_setmark(irl->t);  /* Marks left PHIs _with_ register. */
+    if (ra_noreg(irl->r))
+      ra_sethint(irl->r, r); /* Set register hint for left PHI. */
+  } else {  /* Otherwise allocate a spill slot. */
+    /* This is overly restrictive, but it triggers only on synthetic code. */
+    if (ra_hasreg(irl->r) || ra_hasreg(irr->r))
+      lj_trace_err(as->J, LJ_TRERR_NYIPHI);
+    ra_spill(as, ir);
+    irr->s = ir->s;  /* Set right PHI spill slot. Sync left slot later. */
+  }
+}
+
+static void asm_loop_fixup(ASMState *as);
+
+/* Middle part of a loop. */
+static void asm_loop(ASMState *as)
+{
+  MCode *mcspill;
+  /* LOOP is a guard, so the snapno is up to date. */
+  as->loopsnapno = as->snapno;
+  if (as->gcsteps)
+    asm_gc_check(as);
+  /* LOOP marks the transition from the variant to the invariant part. */
+  as->flagmcp = as->invmcp = NULL;
+  as->sectref = 0;
+  if (!neverfuse(as)) as->fuseref = 0;
+  asm_phi_shuffle(as);
+  mcspill = as->mcp;
+  asm_phi_copyspill(as);
+  asm_loop_fixup(as);
+  as->mcloop = as->mcp;
+  RA_DBGX((as, "===== LOOP ====="));
+  if (!as->realign) RA_DBG_FLUSH();
+  if (as->mcp != mcspill)
+    emit_jmp(as, mcspill);
+}
+
+/* -- Target-specific assembler ------------------------------------------- */
+
+#if LJ_TARGET_X86ORX64
+#include "lj_asm_x86.h"
+#elif LJ_TARGET_ARM
+#include "lj_asm_arm.h"
+#elif LJ_TARGET_PPC
+#include "lj_asm_ppc.h"
+#elif LJ_TARGET_MIPS
+#include "lj_asm_mips.h"
+#else
+#error "Missing assembler for target CPU"
+#endif
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Head of a root trace. */
+static void asm_head_root(ASMState *as)
+{
+  int32_t spadj;
+  asm_head_root_base(as);
+  emit_setvmstate(as, (int32_t)as->T->traceno);
+  spadj = asm_stack_adjust(as);
+  as->T->spadjust = (uint16_t)spadj;
+  emit_spsub(as, spadj);
+  /* Root traces assume a checked stack for the starting proto. */
+  as->T->topslot = gcref(as->T->startpt)->pt.framesize;
+}
+
+/* Head of a side trace.
+**
+** The current simplistic algorithm requires that all slots inherited
+** from the parent are live in a register between pass 2 and pass 3. This
+** avoids the complexity of stack slot shuffling. But of course this may
+** overflow the register set in some cases and cause the dreaded error:
+** "NYI: register coalescing too complex". A refined algorithm is needed.
+*/
+static void asm_head_side(ASMState *as)
+{
+  IRRef1 sloadins[RID_MAX];
+  RegSet allow = RSET_ALL;  /* Inverse of all coalesced registers. */
+  RegSet live = RSET_EMPTY;  /* Live parent registers. */
+  IRIns *irp = &as->parent->ir[REF_BASE];  /* Parent base. */
+  int32_t spadj, spdelta;
+  int pass2 = 0;
+  int pass3 = 0;
+  IRRef i;
+
+  if (as->snapno && as->topslot > as->parent->topslot) {
+    /* Force snap #0 alloc to prevent register overwrite in stack check. */
+    as->snapno = 0;
+    asm_snap_alloc(as);
+  }
+  allow = asm_head_side_base(as, irp, allow);
+
+  /* Scan all parent SLOADs and collect register dependencies. */
+  for (i = as->stopins; i > REF_BASE; i--) {
+    IRIns *ir = IR(i);
+    RegSP rs;
+    lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
+	       (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL);
+    rs = as->parentmap[i - REF_FIRST];
+    if (ra_hasreg(ir->r)) {
+      rset_clear(allow, ir->r);
+      if (ra_hasspill(ir->s)) {
+	ra_save(as, ir, ir->r);
+	checkmclim(as);
+      }
+    } else if (ra_hasspill(ir->s)) {
+      irt_setmark(ir->t);
+      pass2 = 1;
+    }
+    if (ir->r == rs) {  /* Coalesce matching registers right now. */
+      ra_free(as, ir->r);
+    } else if (ra_hasspill(regsp_spill(rs))) {
+      if (ra_hasreg(ir->r))
+	pass3 = 1;
+    } else if (ra_used(ir)) {
+      sloadins[rs] = (IRRef1)i;
+      rset_set(live, rs);  /* Block live parent register. */
+    }
+  }
+
+  /* Calculate stack frame adjustment. */
+  spadj = asm_stack_adjust(as);
+  spdelta = spadj - (int32_t)as->parent->spadjust;
+  if (spdelta < 0) {  /* Don't shrink the stack frame. */
+    spadj = (int32_t)as->parent->spadjust;
+    spdelta = 0;
+  }
+  as->T->spadjust = (uint16_t)spadj;
+
+  /* Reload spilled target registers. */
+  if (pass2) {
+    for (i = as->stopins; i > REF_BASE; i--) {
+      IRIns *ir = IR(i);
+      if (irt_ismarked(ir->t)) {
+	RegSet mask;
+	Reg r;
+	RegSP rs;
+	irt_clearmark(ir->t);
+	rs = as->parentmap[i - REF_FIRST];
+	if (!ra_hasspill(regsp_spill(rs)))
+	  ra_sethint(ir->r, rs);  /* Hint may be gone, set it again. */
+	else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
+	  continue;  /* Same spill slot, do nothing. */
+	mask = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) & allow;
+	if (mask == RSET_EMPTY)
+	  lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+	r = ra_allocref(as, i, mask);
+	ra_save(as, ir, r);
+	rset_clear(allow, r);
+	if (r == rs) {  /* Coalesce matching registers right now. */
+	  ra_free(as, r);
+	  rset_clear(live, r);
+	} else if (ra_hasspill(regsp_spill(rs))) {
+	  pass3 = 1;
+	}
+	checkmclim(as);
+      }
+    }
+  }
+
+  /* Store trace number and adjust stack frame relative to the parent. */
+  emit_setvmstate(as, (int32_t)as->T->traceno);
+  emit_spsub(as, spdelta);
+
+#if !LJ_TARGET_X86ORX64
+  /* Restore BASE register from parent spill slot. */
+  if (ra_hasspill(irp->s))
+    emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, sps_scale(irp->s));
+#endif
+
+  /* Restore target registers from parent spill slots. */
+  if (pass3) {
+    RegSet work = ~as->freeset & RSET_ALL;
+    while (work) {
+      Reg r = rset_pickbot(work);
+      IRRef ref = regcost_ref(as->cost[r]);
+      RegSP rs = as->parentmap[ref - REF_FIRST];
+      rset_clear(work, r);
+      if (ra_hasspill(regsp_spill(rs))) {
+	int32_t ofs = sps_scale(regsp_spill(rs));
+	ra_free(as, r);
+	emit_spload(as, IR(ref), r, ofs);
+	checkmclim(as);
+      }
+    }
+  }
+
+  /* Shuffle registers to match up target regs with parent regs. */
+  for (;;) {
+    RegSet work;
+
+    /* Repeatedly coalesce free live registers by moving to their target. */
+    while ((work = as->freeset & live) != RSET_EMPTY) {
+      Reg rp = rset_pickbot(work);
+      IRIns *ir = IR(sloadins[rp]);
+      rset_clear(live, rp);
+      rset_clear(allow, rp);
+      ra_free(as, ir->r);
+      emit_movrr(as, ir, ir->r, rp);
+      checkmclim(as);
+    }
+
+    /* We're done if no live registers remain. */
+    if (live == RSET_EMPTY)
+      break;
+
+    /* Break cycles by renaming one target to a temp. register. */
+    if (live & RSET_GPR) {
+      RegSet tmpset = as->freeset & ~live & allow & RSET_GPR;
+      if (tmpset == RSET_EMPTY)
+	lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+      ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset));
+    }
+    if (!LJ_SOFTFP && (live & RSET_FPR)) {
+      RegSet tmpset = as->freeset & ~live & allow & RSET_FPR;
+      if (tmpset == RSET_EMPTY)
+	lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+      ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset));
+    }
+    checkmclim(as);
+    /* Continue with coalescing to fix up the broken cycle(s). */
+  }
+
+  /* Inherit top stack slot already checked by parent trace. */
+  as->T->topslot = as->parent->topslot;
+  if (as->topslot > as->T->topslot) {  /* Need to check for higher slot? */
+#ifdef EXITSTATE_CHECKEXIT
+    /* Highest exit + 1 indicates stack check. */
+    ExitNo exitno = as->T->nsnap;
+#else
+    /* Reuse the parent exit in the context of the parent trace. */
+    ExitNo exitno = as->J->exitno;
+#endif
+    as->T->topslot = (uint8_t)as->topslot;  /* Remember for child traces. */
+    asm_stack_check(as, as->topslot, irp, allow & RSET_GPR, exitno);
+  }
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Get base slot for a snapshot. */
+static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
+{
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+  MSize n;
+  for (n = snap->nent; n > 0; n--) {
+    SnapEntry sn = map[n-1];
+    if ((sn & SNAP_FRAME)) {
+      *gotframe = 1;
+      return snap_slot(sn);
+    }
+  }
+  return 0;
+}
+
+/* Link to another trace. */
+static void asm_tail_link(ASMState *as)
+{
+  SnapNo snapno = as->T->nsnap-1;  /* Last snapshot. */
+  SnapShot *snap = &as->T->snap[snapno];
+  int gotframe = 0;
+  BCReg baseslot = asm_baseslot(as, snap, &gotframe);
+
+  as->topslot = snap->topslot;
+  checkmclim(as);
+  ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
+
+  if (as->T->link == 0) {
+    /* Setup fixed registers for exit to interpreter. */
+    const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]);
+    int32_t mres;
+    if (bc_op(*pc) == BC_JLOOP) {  /* NYI: find a better way to do this. */
+      BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
+      if (bc_isret(bc_op(*retpc)))
+	pc = retpc;
+    }
+    ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
+    ra_allockreg(as, i32ptr(pc), RID_LPC);
+    mres = (int32_t)(snap->nslots - baseslot);
+    switch (bc_op(*pc)) {
+    case BC_CALLM: case BC_CALLMT:
+      mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break;
+    case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
+    case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
+    default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
+    }
+    ra_allockreg(as, mres, RID_RET);  /* Return MULTRES or 0. */
+  } else if (baseslot) {
+    /* Save modified BASE for linking to trace with higher start frame. */
+    emit_setgl(as, RID_BASE, jit_base);
+  }
+  emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
+
+  /* Sync the interpreter state with the on-trace state. */
+  asm_stack_restore(as, snap);
+
+  /* Root traces that add frames need to check the stack at the end. */
+  if (!as->parent && gotframe)
+    asm_stack_check(as, as->topslot, NULL, as->freeset & RSET_GPR, snapno);
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Clear reg/sp for all instructions and add register hints. */
+static void asm_setup_regsp(ASMState *as)
+{
+  GCtrace *T = as->T;
+  int sink = T->sinktags;
+  IRRef nins = T->nins;
+  IRIns *ir, *lastir;
+  int inloop;
+#if LJ_TARGET_ARM
+  uint32_t rload = 0xa6402a64;
+#endif
+
+  ra_setup(as);
+
+  /* Clear reg/sp for constants. */
+  for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++)
+    ir->prev = REGSP_INIT;
+
+  /* REF_BASE is used for implicit references to the BASE register. */
+  lastir->prev = REGSP_HINT(RID_BASE);
+
+  ir = IR(nins-1);
+  if (ir->o == IR_RENAME) {
+    do { ir--; nins--; } while (ir->o == IR_RENAME);
+    T->nins = nins;  /* Remove any renames left over from ASM restart. */
+  }
+  as->snaprename = nins;
+  as->snapref = nins;
+  as->snapno = T->nsnap;
+
+  as->stopins = REF_BASE;
+  as->orignins = nins;
+  as->curins = nins;
+
+  /* Setup register hints for parent link instructions. */
+  ir = IR(REF_FIRST);
+  if (as->parent) {
+    uint16_t *p;
+    lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir);
+    if (lastir - ir > LJ_MAX_JSLOTS)
+      lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+    as->stopins = (IRRef)((lastir-1) - as->ir);
+    for (p = as->parentmap; ir < lastir; ir++) {
+      RegSP rs = ir->prev;
+      *p++ = (uint16_t)rs;  /* Copy original parent RegSP to parentmap. */
+      if (!ra_hasspill(regsp_spill(rs)))
+	ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
+      else
+	ir->prev = REGSP_INIT;
+    }
+  }
+
+  inloop = 0;
+  as->evenspill = SPS_FIRST;
+  for (lastir = IR(nins); ir < lastir; ir++) {
+    if (sink) {
+      if (ir->r == RID_SINK)
+	continue;
+      if (ir->r == RID_SUNK) {  /* Revert after ASM restart. */
+	ir->r = RID_SINK;
+	continue;
+      }
+    }
+    switch (ir->o) {
+    case IR_LOOP:
+      inloop = 1;
+      break;
+#if LJ_TARGET_ARM
+    case IR_SLOAD:
+      if (!((ir->op2 & IRSLOAD_TYPECHECK) || (ir+1)->o == IR_HIOP))
+	break;
+      /* fallthrough */
+    case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+      if (!LJ_SOFTFP && irt_isnum(ir->t)) break;
+      ir->prev = (uint16_t)REGSP_HINT((rload & 15));
+      rload = lj_ror(rload, 4);
+      continue;
+#endif
+    case IR_CALLXS: {
+      CCallInfo ci;
+      ci.flags = asm_callx_flags(as, ir);
+      ir->prev = asm_setup_call_slots(as, ir, &ci);
+      if (inloop)
+	as->modset |= RSET_SCRATCH;
+      continue;
+      }
+    case IR_CALLN: case IR_CALLL: case IR_CALLS: {
+      const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+      ir->prev = asm_setup_call_slots(as, ir, ci);
+      if (inloop)
+	as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
+		      (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
+      continue;
+      }
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
+    case IR_HIOP:
+      switch ((ir-1)->o) {
+#if LJ_SOFTFP && LJ_TARGET_ARM
+      case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+	if (ra_hashint((ir-1)->r)) {
+	  ir->prev = (ir-1)->prev + 1;
+	  continue;
+	}
+	break;
+#endif
+#if !LJ_SOFTFP && LJ_NEED_FP64
+      case IR_CONV:
+	if (irt_isfp((ir-1)->t)) {
+	  ir->prev = REGSP_HINT(RID_FPRET);
+	  continue;
+	}
+	/* fallthrough */
+#endif
+      case IR_CALLN: case IR_CALLXS:
+#if LJ_SOFTFP
+      case IR_MIN: case IR_MAX:
+#endif
+	(ir-1)->prev = REGSP_HINT(RID_RETLO);
+	ir->prev = REGSP_HINT(RID_RETHI);
+	continue;
+      default:
+	break;
+      }
+      break;
+#endif
+#if LJ_SOFTFP
+    case IR_MIN: case IR_MAX:
+      if ((ir+1)->o != IR_HIOP) break;
+      /* fallthrough */
+#endif
+    /* C calls evict all scratch regs and return results in RID_RET. */
+    case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
+      if (REGARG_NUMGPR < 3 && as->evenspill < 3)
+	as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
+    case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR:
+      ir->prev = REGSP_HINT(RID_RET);
+      if (inloop)
+	as->modset = RSET_SCRATCH;
+      continue;
+    case IR_STRTO: case IR_OBAR:
+      if (inloop)
+	as->modset = RSET_SCRATCH;
+      break;
+#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP
+    case IR_ATAN2: case IR_LDEXP:
+#endif
+    case IR_POW:
+      if (!LJ_SOFTFP && irt_isnum(ir->t)) {
+#if LJ_TARGET_X86ORX64
+	ir->prev = REGSP_HINT(RID_XMM0);
+	if (inloop)
+	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
+#else
+	ir->prev = REGSP_HINT(RID_FPRET);
+	if (inloop)
+	  as->modset |= RSET_SCRATCH;
+#endif
+	continue;
+      }
+      /* fallthrough for integer POW */
+    case IR_DIV: case IR_MOD:
+      if (!irt_isnum(ir->t)) {
+	ir->prev = REGSP_HINT(RID_RET);
+	if (inloop)
+	  as->modset |= (RSET_SCRATCH & RSET_GPR);
+	continue;
+      }
+      break;
+    case IR_FPMATH:
+#if LJ_TARGET_X86ORX64
+      if (ir->op2 == IRFPM_EXP2) {  /* May be joined to lj_vm_pow_sse. */
+	ir->prev = REGSP_HINT(RID_XMM0);
+#if !LJ_64
+	if (as->evenspill < 4)  /* Leave room for 16 byte scratch area. */
+	  as->evenspill = 4;
+#endif
+	if (inloop)
+	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
+	continue;
+      } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
+	ir->prev = REGSP_HINT(RID_XMM0);
+	if (inloop)
+	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+	continue;
+      }
+      break;
+#else
+      ir->prev = REGSP_HINT(RID_FPRET);
+      if (inloop)
+	as->modset |= RSET_SCRATCH;
+      continue;
+#endif
+#if LJ_TARGET_X86ORX64
+    /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
+    case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
+      if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
+	IR(ir->op2)->r = REGSP_HINT(RID_ECX);
+	if (inloop)
+	  rset_set(as->modset, RID_ECX);
+      }
+      break;
+#endif
+    /* Do not propagate hints across type conversions or loads. */
+    case IR_TOBIT:
+    case IR_XLOAD:
+#if !LJ_TARGET_ARM
+    case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+#endif
+      break;
+    case IR_CONV:
+      if (irt_isfp(ir->t) || (ir->op2 & IRCONV_SRCMASK) == IRT_NUM ||
+	  (ir->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
+	break;
+      /* fallthrough */
+    default:
+      /* Propagate hints across likely 'op reg, imm' or 'op reg'. */
+      if (irref_isk(ir->op2) && !irref_isk(ir->op1) &&
+	  ra_hashint(regsp_reg(IR(ir->op1)->prev))) {
+	ir->prev = IR(ir->op1)->prev;
+	continue;
+      }
+      break;
+    }
+    ir->prev = REGSP_INIT;
+  }
+  if ((as->evenspill & 1))
+    as->oddspill = as->evenspill++;
+  else
+    as->oddspill = 0;
+}
+
+/* -- Assembler core ------------------------------------------------------ */
+
+/* Assemble a trace. */
+void lj_asm_trace(jit_State *J, GCtrace *T)
+{
+  ASMState as_;
+  ASMState *as = &as_;
+  MCode *origtop;
+
+  /* Ensure an initialized instruction beyond the last one for HIOP checks. */
+  J->cur.nins = lj_ir_nextins(J);
+  J->cur.ir[J->cur.nins].o = IR_NOP;
+
+  /* Setup initial state. Copy some fields to reduce indirections. */
+  as->J = J;
+  as->T = T;
+  as->ir = T->ir;
+  as->flags = J->flags;
+  as->loopref = J->loopref;
+  as->realign = NULL;
+  as->loopinv = 0;
+  as->parent = J->parent ? traceref(J, J->parent) : NULL;
+
+  /* Reserve MCode memory. */
+  as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot);
+  as->mcp = as->mctop;
+  as->mclim = as->mcbot + MCLIM_REDZONE;
+  asm_setup_target(as);
+
+  do {
+    as->mcp = as->mctop;
+#ifdef LUA_USE_ASSERT
+    as->mcp_prev = as->mcp;
+#endif
+    as->curins = T->nins;
+    RA_DBG_START();
+    RA_DBGX((as, "===== STOP ====="));
+
+    /* General trace setup. Emit tail of trace. */
+    asm_tail_prep(as);
+    as->mcloop = NULL;
+    as->flagmcp = NULL;
+    as->topslot = 0;
+    as->gcsteps = 0;
+    as->sectref = as->loopref;
+    as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
+    asm_setup_regsp(as);
+    if (!as->loopref)
+      asm_tail_link(as);
+
+    /* Assemble a trace in linear backwards order. */
+    for (as->curins--; as->curins > as->stopins; as->curins--) {
+      IRIns *ir = IR(as->curins);
+      lua_assert(!(LJ_32 && irt_isint64(ir->t)));  /* Handled by SPLIT. */
+      if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
+	continue;  /* Dead-code elimination can be soooo easy. */
+      if (irt_isguard(ir->t))
+	asm_snap_prep(as);
+      RA_DBG_REF();
+      checkmclim(as);
+      asm_ir(as, ir);
+    }
+  } while (as->realign);  /* Retry in case the MCode needs to be realigned. */
+
+  /* Emit head of trace. */
+  RA_DBG_REF();
+  checkmclim(as);
+  if (as->gcsteps > 0) {
+    as->curins = as->T->snap[0].ref;
+    asm_snap_prep(as);  /* The GC check is a guard. */
+    asm_gc_check(as);
+  }
+  ra_evictk(as);
+  if (as->parent)
+    asm_head_side(as);
+  else
+    asm_head_root(as);
+  asm_phi_fixup(as);
+
+  RA_DBGX((as, "===== START ===="));
+  RA_DBG_FLUSH();
+  if (as->freeset != RSET_ALL)
+    lj_trace_err(as->J, LJ_TRERR_BADRA);  /* Ouch! Should never happen. */
+
+  /* Set trace entry point before fixing up tail to allow link to self. */
+  T->mcode = as->mcp;
+  T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
+  if (!as->loopref)
+    asm_tail_fixup(as, T->link);  /* Note: this may change as->mctop! */
+  T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
+  lj_mcode_sync(T->mcode, origtop);
+}
+
+#undef IR
+
+#endif

+ 17 - 0
luajit.mod/luajit/src/lj_asm.h

@@ -0,0 +1,17 @@
+/*
+** IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_ASM_H
+#define _LJ_ASM_H
+
+#include "lj_jit.h"
+
+#if LJ_HASJIT
+LJ_FUNC void lj_asm_trace(jit_State *J, GCtrace *T);
+LJ_FUNC void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
+			      MCode *target);
+#endif
+
+#endif

+ 2361 - 0
luajit.mod/luajit/src/lj_asm_arm.h

@@ -0,0 +1,2361 @@
+/*
+** ARM IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* -- Register allocator extensions --------------------------------------- */
+
+/* Allocate a register with a hint. */
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
+{
+  Reg r = IR(ref)->r;
+  if (ra_noreg(r)) {
+    if (!ra_hashint(r) && !iscrossref(as, ref))
+      ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
+    r = ra_allocref(as, ref, allow);
+  }
+  ra_noweak(as, r);
+  return r;
+}
+
+/* Allocate a scratch register pair. */
+static Reg ra_scratchpair(ASMState *as, RegSet allow)
+{
+  RegSet pick1 = as->freeset & allow;
+  RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN;
+  Reg r;
+  if (pick2) {
+    r = rset_picktop(pick2);
+  } else {
+    RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN;
+    if (pick) {
+      r = rset_picktop(pick);
+      ra_restore(as, regcost_ref(as->cost[r+1]));
+    } else {
+      pick = pick1 & (allow << 1) & RSET_GPRODD;
+      if (pick) {
+	r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick)-1]));
+      } else {
+	r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN);
+	ra_restore(as, regcost_ref(as->cost[r+1]));
+      }
+    }
+  }
+  lua_assert(rset_test(RSET_GPREVEN, r));
+  ra_modified(as, r);
+  ra_modified(as, r+1);
+  RA_DBGX((as, "scratchpair    $r $r", r, r+1));
+  return r;
+}
+
+#if !LJ_SOFTFP
+/* Allocate two source registers for three-operand instructions. */
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+{
+  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+  Reg left = irl->r, right = irr->r;
+  if (ra_hasreg(left)) {
+    ra_noweak(as, left);
+    if (ra_noreg(right))
+      right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
+    else
+      ra_noweak(as, right);
+  } else if (ra_hasreg(right)) {
+    ra_noweak(as, right);
+    left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
+  } else if (ra_hashint(right)) {
+    right = ra_allocref(as, ir->op2, allow);
+    left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
+  } else {
+    left = ra_allocref(as, ir->op1, allow);
+    right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
+  }
+  return left | (right << 8);
+}
+#endif
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Generate an exit stub group at the bottom of the reserved MCode memory. */
+static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
+{
+  MCode *mxp = as->mcbot;
+  int i;
+  if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop)
+    asm_mclimit(as);
+  /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */
+  *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP);
+  *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu);
+  mxp++;
+  *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch);  /* DISPATCH address */
+  *mxp++ = group*EXITSTUBS_PER_GROUP;
+  for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
+    *mxp++ = ARMI_B|((-6-i)&0x00ffffffu);
+  lj_mcode_sync(as->mcbot, mxp);
+  lj_mcode_commitbot(as->J, mxp);
+  as->mcbot = mxp;
+  as->mclim = as->mcbot + MCLIM_REDZONE;
+  return mxp - EXITSTUBS_PER_GROUP;
+}
+
+/* Setup all needed exit stubs. */
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+{
+  ExitNo i;
+  if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
+    lj_trace_err(as->J, LJ_TRERR_SNAPOV);
+  for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
+    if (as->J->exitstubgroup[i] == NULL)
+      as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
+}
+
+/* Emit conditional branch to exit for guard. */
+static void asm_guardcc(ASMState *as, ARMCC cc)
+{
+  MCode *target = exitstub_addr(as->J, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    *p = ARMI_BL | ((target-p-2) & 0x00ffffffu);
+    emit_branch(as, ARMF_CC(ARMI_B, cc^1), p+1);
+    return;
+  }
+  emit_branch(as, ARMF_CC(ARMI_BL, cc), target);
+}
+
+/* -- Operand fusion ------------------------------------------------------ */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM	31
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+  IRIns *ir = as->ir;
+  IRRef i = as->curins;
+  if (i > ref + CONFLICT_SEARCH_LIM)
+    return 0;  /* Give up, ref is too far away. */
+  while (--i > ref)
+    if (ir[i].o == conflict)
+      return 0;  /* Conflict found. */
+  return 1;  /* Ok, no conflict. */
+}
+
+/* Fuse the array base of colocated arrays. */
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
+      !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
+    return (int32_t)sizeof(GCtab);
+  return 0;
+}
+
+/* Fuse array/hash/upvalue reference into register+offset operand. */
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
+			  int lim)
+{
+  IRIns *ir = IR(ref);
+  if (ra_noreg(ir->r)) {
+    if (ir->o == IR_AREF) {
+      if (mayfuse(as, ref)) {
+	if (irref_isk(ir->op2)) {
+	  IRRef tab = IR(ir->op1)->op1;
+	  int32_t ofs = asm_fuseabase(as, tab);
+	  IRRef refa = ofs ? tab : ir->op1;
+	  ofs += 8*IR(ir->op2)->i;
+	  if (ofs > -lim && ofs < lim) {
+	    *ofsp = ofs;
+	    return ra_alloc1(as, refa, allow);
+	  }
+	}
+      }
+    } else if (ir->o == IR_HREFK) {
+      if (mayfuse(as, ref)) {
+	int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+	if (ofs < lim) {
+	  *ofsp = ofs;
+	  return ra_alloc1(as, ir->op1, allow);
+	}
+      }
+    } else if (ir->o == IR_UREFC) {
+      if (irref_isk(ir->op1)) {
+	GCfunc *fn = ir_kfunc(IR(ir->op1));
+	int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
+	*ofsp = (ofs & 255);  /* Mask out less bits to allow LDRD. */
+	return ra_allock(as, (ofs & ~255), allow);
+      }
+    }
+  }
+  *ofsp = 0;
+  return ra_alloc1(as, ref, allow);
+}
+
+/* Fuse m operand into arithmetic/logic instructions. */
+static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  if (ra_hasreg(ir->r)) {
+    ra_noweak(as, ir->r);
+    return ARMF_M(ir->r);
+  } else if (irref_isk(ref)) {
+    uint32_t k = emit_isk12(ai, ir->i);
+    if (k)
+      return k;
+  } else if (mayfuse(as, ref)) {
+    if (ir->o >= IR_BSHL && ir->o <= IR_BROR) {
+      Reg m = ra_alloc1(as, ir->op1, allow);
+      ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL :
+		    ir->o == IR_BSHR ? ARMSH_LSR :
+		    ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR;
+      if (irref_isk(ir->op2)) {
+	return m | ARMF_SH(sh, (IR(ir->op2)->i & 31));
+      } else {
+	Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m));
+	return m | ARMF_RSH(sh, s);
+      }
+    } else if (ir->o == IR_ADD && ir->op1 == ir->op2) {
+      Reg m = ra_alloc1(as, ir->op1, allow);
+      return m | ARMF_SH(ARMSH_LSL, 1);
+    }
+  }
+  return ra_allocref(as, ref, allow);
+}
+
+/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */
+static IRRef asm_fuselsl2(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL &&
+      irref_isk(ir->op2) && IR(ir->op2)->i == 2)
+    return ir->op1;
+  return 0;  /* No fusion. */
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
+			 RegSet allow, int32_t ofs)
+{
+  IRIns *ir = IR(ref);
+  Reg base;
+  if (ra_noreg(ir->r) && canfuse(as, ir)) {
+    int32_t lim = (!LJ_SOFTFP && (ai & 0x08000000)) ? 1024 :
+		   (ai & 0x04000000) ? 4096 : 256;
+    if (ir->o == IR_ADD) {
+      int32_t ofs2;
+      if (irref_isk(ir->op2) &&
+	  (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim &&
+	  (!(!LJ_SOFTFP && (ai & 0x08000000)) || !(ofs2 & 3))) {
+	ofs = ofs2;
+	ref = ir->op1;
+      } else if (ofs == 0 && !(!LJ_SOFTFP && (ai & 0x08000000))) {
+	IRRef lref = ir->op1, rref = ir->op2;
+	Reg rn, rm;
+	if ((ai & 0x04000000)) {
+	  IRRef sref = asm_fuselsl2(as, rref);
+	  if (sref) {
+	    rref = sref;
+	    ai |= ARMF_SH(ARMSH_LSL, 2);
+	  } else if ((sref = asm_fuselsl2(as, lref)) != 0) {
+	    lref = rref;
+	    rref = sref;
+	    ai |= ARMF_SH(ARMSH_LSL, 2);
+	  }
+	}
+	rn = ra_alloc1(as, lref, allow);
+	rm = ra_alloc1(as, rref, rset_exclude(allow, rn));
+	if ((ai & 0x04000000)) ai |= ARMI_LS_R;
+	emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm);
+	return;
+      }
+    } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) {
+      lua_assert(ofs == 0);
+      ofs = (int32_t)sizeof(GCstr);
+      if (irref_isk(ir->op2)) {
+	ofs += IR(ir->op2)->i;
+	ref = ir->op1;
+      } else if (irref_isk(ir->op1)) {
+	ofs += IR(ir->op1)->i;
+	ref = ir->op2;
+      } else {
+	/* NYI: Fuse ADD with constant. */
+	Reg rn = ra_alloc1(as, ir->op1, allow);
+	uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
+	if ((ai & 0x04000000))
+	  emit_lso(as, ai, rd, rd, ofs);
+	else
+	  emit_lsox(as, ai, rd, rd, ofs);
+	emit_dn(as, ARMI_ADD^m, rd, rn);
+	return;
+      }
+      if (ofs <= -lim || ofs >= lim) {
+	Reg rn = ra_alloc1(as, ref, allow);
+	Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
+	if ((ai & 0x04000000)) ai |= ARMI_LS_R;
+	emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm);
+	return;
+      }
+    }
+  }
+  base = ra_alloc1(as, ref, allow);
+#if !LJ_SOFTFP
+  if ((ai & 0x08000000))
+    emit_vlso(as, ai, rd, base, ofs);
+  else
+#endif
+  if ((ai & 0x04000000))
+    emit_lso(as, ai, rd, base, ofs);
+  else
+    emit_lsox(as, ai, rd, base, ofs);
+}
+
+#if !LJ_SOFTFP
+/* Fuse to multiply-add/sub instruction. */
+static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  IRIns *irm;
+  if (lref != rref &&
+      ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+	ra_noreg(irm->r)) ||
+       (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+	(rref = lref, ai = air, ra_noreg(irm->r))))) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
+    Reg right, left = ra_alloc2(as, irm,
+			rset_exclude(rset_exclude(RSET_FPR, dest), add));
+    right = (left >> 8); left &= 255;
+    emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15));
+    if (dest != add) emit_dm(as, ARMI_VMOV_D, (dest & 15), (add & 15));
+    return 1;
+  }
+  return 0;
+}
+#endif
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n, nargs = CCI_NARGS(ci);
+  int32_t ofs = 0;
+#if LJ_SOFTFP
+  Reg gpr = REGARG_FIRSTGPR;
+#else
+  Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0;
+#endif
+  if ((void *)ci->func)
+    emit_call(as, (void *)ci->func);
+#if !LJ_SOFTFP
+  for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
+    as->cost[gpr] = REGCOST(~0u, ASMREF_L);
+  gpr = REGARG_FIRSTGPR;
+#endif
+  for (n = 0; n < nargs; n++) {  /* Setup args. */
+    IRRef ref = args[n];
+    IRIns *ir = IR(ref);
+#if !LJ_SOFTFP
+    if (ref && irt_isfp(ir->t)) {
+      RegSet of = as->freeset;
+      Reg src;
+      if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {
+	if (irt_isnum(ir->t)) {
+	  if (fpr <= REGARG_LASTFPR) {
+	    ra_leftov(as, fpr, ref);
+	    fpr++;
+	    continue;
+	  }
+	} else if (fprodd) {  /* Ick. */
+	  src = ra_alloc1(as, ref, RSET_FPR);
+	  emit_dm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00400000);
+	  fprodd = 0;
+	  continue;
+	} else if (fpr <= REGARG_LASTFPR) {
+	  ra_leftov(as, fpr, ref);
+	  fprodd = fpr++;
+	  continue;
+	}
+	/* Workaround to protect argument GPRs from being used for remat. */
+	as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1);
+	src = ra_alloc1(as, ref, RSET_FPR);  /* May alloc GPR to remat FPR. */
+	as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
+	fprodd = 0;
+	goto stackfp;
+      }
+      /* Workaround to protect argument GPRs from being used for remat. */
+      as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1);
+      src = ra_alloc1(as, ref, RSET_FPR);  /* May alloc GPR to remat FPR. */
+      as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
+      if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u;
+      if (gpr <= REGARG_LASTGPR) {
+	lua_assert(rset_test(as->freeset, gpr));  /* Must have been evicted. */
+	if (irt_isnum(ir->t)) {
+	  lua_assert(rset_test(as->freeset, gpr+1));  /* Ditto. */
+	  emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15));
+	  gpr += 2;
+	} else {
+	  emit_dn(as, ARMI_VMOV_R_S, gpr, (src & 15));
+	  gpr++;
+	}
+      } else {
+      stackfp:
+	if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
+	emit_spstore(as, ir, src, ofs);
+	ofs += irt_isnum(ir->t) ? 8 : 4;
+      }
+    } else
+#endif
+    {
+      if (gpr <= REGARG_LASTGPR) {
+	lua_assert(rset_test(as->freeset, gpr));  /* Must have been evicted. */
+	if (ref) ra_leftov(as, gpr, ref);
+	gpr++;
+      } else {
+	if (ref) {
+	  Reg r = ra_alloc1(as, ref, RSET_GPR);
+	  emit_spstore(as, ir, r, ofs);
+	}
+	ofs += 4;
+      }
+    }
+  }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  RegSet drop = RSET_SCRATCH;
+  int hiop = ((ir+1)->o == IR_HIOP);
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  if (hiop && ra_hasreg((ir+1)->r))
+    rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);  /* Evictions must be performed first. */
+  if (ra_used(ir)) {
+    lua_assert(!irt_ispri(ir->t));
+    if (!LJ_SOFTFP && irt_isfp(ir->t)) {
+      if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) {
+	Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
+	if (irt_isnum(ir->t))
+	  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest);
+	else
+	  emit_dn(as, ARMI_VMOV_S_R, RID_RET, dest);
+      } else {
+	ra_destreg(as, ir, RID_FPRET);
+      }
+    } else if (hiop) {
+      ra_destpair(as, ir);
+    } else {
+      ra_destreg(as, ir, RID_RET);
+    }
+  }
+  UNUSED(ci);
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX];
+  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+  asm_collectargs(as, ir, ci, args);
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  CCallInfo ci;
+  IRRef func;
+  IRIns *irf;
+  ci.flags = asm_callx_flags(as, ir);
+  asm_collectargs(as, ir, &ci, args);
+  asm_setupresult(as, ir, &ci);
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  if (irref_isk(func)) {  /* Call to constant address. */
+    ci.func = (ASMFunction)(void *)(irf->i);
+  } else {  /* Need a non-argument register for indirect calls. */
+    Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12+1));
+    emit_m(as, ARMI_BLXr, freg);
+    ci.func = (ASMFunction)(void *)0;
+  }
+  asm_gencall(as, &ci, args);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+  void *pc = ir_kptr(IR(ir->op2));
+  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  as->topslot -= (BCReg)delta;
+  if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
+  /* Need to force a spill on REF_BASE now to update the stack slot. */
+  emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE)));
+  emit_setgl(as, base, jit_base);
+  emit_addptr(as, base, -8*delta);
+  asm_guardcc(as, CC_NE);
+  emit_nm(as, ARMI_CMP, RID_TMP,
+	  ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
+  emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+#if !LJ_SOFTFP
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  asm_guardcc(as, CC_NE);
+  emit_d(as, ARMI_VMRS, 0);
+  emit_dm(as, ARMI_VCMP_D, (tmp & 15), (left & 15));
+  emit_dm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15));
+  emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
+  emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15));
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_FPR;
+  Reg left = ra_alloc1(as, ir->op1, allow);
+  Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
+  Reg tmp = ra_scratch(as, rset_clear(allow, right));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
+  emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
+}
+#endif
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if !LJ_SOFTFP
+  int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+#endif
+  IRRef lref = ir->op1;
+  /* 64 bit integer conversions are handled by SPLIT. */
+  lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64));
+#if LJ_SOFTFP
+  /* FP conversions are handled by SPLIT. */
+  lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
+  /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
+#else
+  lua_assert(irt_type(ir->t) != st);
+  if (irt_isfp(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    if (stfp) {  /* FP to FP conversion. */
+      emit_dm(as, st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32,
+	      (dest & 15), (ra_alloc1(as, lref, RSET_FPR) & 15));
+    } else {  /* Integer to FP conversion. */
+      Reg left = ra_alloc1(as, lref, RSET_GPR);
+      ARMIns ai = irt_isfloat(ir->t) ?
+	(st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) :
+	(st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32);
+      emit_dm(as, ai, (dest & 15), (dest & 15));
+      emit_dn(as, ARMI_VMOV_S_R, left, (dest & 15));
+    }
+  } else if (stfp) {  /* FP to integer conversion. */
+    if (irt_isguard(ir->t)) {
+      /* Checked conversions are only supported from number to int. */
+      lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+      asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+    } else {
+      Reg left = ra_alloc1(as, lref, RSET_FPR);
+      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      ARMIns ai;
+      emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
+      ai = irt_isint(ir->t) ?
+	(st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) :
+	(st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32);
+      emit_dm(as, ai, (tmp & 15), (left & 15));
+    }
+  } else
+#endif
+  {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
+      Reg left = ra_alloc1(as, lref, RSET_GPR);
+      lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+      if ((as->flags & JIT_F_ARMV6)) {
+	ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
+		    st == IRT_U8 ? ARMI_UXTB :
+		    st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH;
+	emit_dm(as, ai, dest, left);
+      } else if (st == IRT_U8) {
+	emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left);
+      } else {
+	uint32_t shift = st == IRT_I8 ? 24 : 16;
+	ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR;
+	emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP);
+	emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left);
+      }
+    } else {  /* Handle 32/32 bit no-op (cast). */
+      ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+    }
+  }
+}
+
+#if !LJ_SOFTFP && LJ_HASFFI
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+  IRCallID id;
+  CCallInfo ci;
+  IRRef args[2];
+  args[0] = (ir-1)->op1;
+  args[1] = ir->op1;
+  if (st == IRT_NUM || st == IRT_FLOAT) {
+    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+    ir--;
+  } else {
+    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
+  }
+  ci = lj_ir_callinfo[id];
+#if !LJ_ABI_SOFTFP
+  ci.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
+#endif
+  asm_setupresult(as, ir, &ci);
+  asm_gencall(as, &ci, args);
+}
+#endif
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+  IRRef args[2];
+  Reg rlo = 0, rhi = 0, tmp;
+  int destused = ra_used(ir);
+  int32_t ofs = 0;
+  ra_evictset(as, RSET_SCRATCH);
+#if LJ_SOFTFP
+  if (destused) {
+    if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
+	(ir->s & 1) == 0 && ir->s + 1 == (ir+1)->s) {
+      int i;
+      for (i = 0; i < 2; i++) {
+	Reg r = (ir+i)->r;
+	if (ra_hasreg(r)) {
+	  ra_free(as, r);
+	  ra_modified(as, r);
+	  emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
+	}
+      }
+      ofs = sps_scale(ir->s);
+      destused = 0;
+    } else {
+      rhi = ra_dest(as, ir+1, RSET_GPR);
+      rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
+    }
+  }
+  asm_guardcc(as, CC_EQ);
+  if (destused) {
+    emit_lso(as, ARMI_LDR, rhi, RID_SP, 4);
+    emit_lso(as, ARMI_LDR, rlo, RID_SP, 0);
+  }
+#else
+  UNUSED(rhi);
+  if (destused) {
+    if (ra_hasspill(ir->s)) {
+      ofs = sps_scale(ir->s);
+      destused = 0;
+      if (ra_hasreg(ir->r)) {
+	ra_free(as, ir->r);
+	ra_modified(as, ir->r);
+	emit_spload(as, ir, ir->r, ofs);
+      }
+    } else {
+      rlo = ra_dest(as, ir, RSET_FPR);
+    }
+  }
+  asm_guardcc(as, CC_EQ);
+  if (destused)
+    emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0);
+#endif
+  emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET);  /* Test return status. */
+  args[0] = ir->op1;      /* GCstr *str */
+  args[1] = ASMREF_TMP1;  /* TValue *n  */
+  asm_gencall(as, ci, args);
+  tmp = ra_releasetmp(as, ASMREF_TMP1);
+  if (ofs == 0)
+    emit_dm(as, ARMI_MOV, tmp, RID_SP);
+  else
+    emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
+}
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (irt_isnum(ir->t)) {
+    if (irref_isk(ref)) {
+      /* Use the number constant itself as a TValue. */
+      ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+    } else {
+#if LJ_SOFTFP
+      lua_assert(0);
+#else
+      /* Otherwise force a spill and use the spill slot. */
+      emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+#endif
+    }
+  } else {
+    /* Otherwise use [sp] and [sp+4] to hold the TValue. */
+    RegSet allow = rset_exclude(RSET_GPR, dest);
+    Reg type;
+    emit_dm(as, ARMI_MOV, dest, RID_SP);
+    if (!irt_ispri(ir->t)) {
+      Reg src = ra_alloc1(as, ref, allow);
+      emit_lso(as, ARMI_STR, src, RID_SP, 0);
+    }
+    if ((ir+1)->o == IR_HIOP)
+      type = ra_alloc1(as, ref+1, allow);
+    else
+      type = ra_allock(as, irt_toitype(ir->t), allow);
+    emit_lso(as, ARMI_STR, type, RID_SP, 4);
+  }
+}
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+  IRRef args[2];
+  args[0] = ASMREF_L;
+  as->gcsteps++;
+  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
+    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
+    args[1] = ASMREF_TMP1;  /* const lua_Number * */
+    asm_setupresult(as, ir, ci);  /* GCstr * */
+    asm_gencall(as, ci, args);
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+  } else {
+    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
+    args[1] = ir->op1;  /* int32_t k */
+    asm_setupresult(as, ir, ci);  /* GCstr * */
+    asm_gencall(as, ci, args);
+  }
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx, base;
+  if (irref_isk(ir->op2)) {
+    IRRef tab = IR(ir->op1)->op1;
+    int32_t ofs = asm_fuseabase(as, tab);
+    IRRef refa = ofs ? tab : ir->op1;
+    uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i);
+    if (k) {
+      base = ra_alloc1(as, refa, RSET_GPR);
+      emit_dn(as, ARMI_ADD^k, dest, base);
+      return;
+    }
+  }
+  base = ra_alloc1(as, ir->op1, RSET_GPR);
+  idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+  emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, base, idx);
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+**   Node *n = hashkey(t, key);
+**   do {
+**     if (lj_obj_equal(&n->key, key)) return &n->val;
+**   } while ((n = nextnode(n)));
+**   return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+{
+  RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
+  Reg dest = ra_dest(as, ir, allow);
+  Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+  Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP;
+  IRRef refkey = ir->op2;
+  IRIns *irkey = IR(refkey);
+  IRType1 kt = irkey->t;
+  int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt));
+  uint32_t khash;
+  MCLabel l_end, l_loop;
+  rset_clear(allow, tab);
+  if (!irref_isk(refkey) || irt_isstr(kt)) {
+#if LJ_SOFTFP
+    key = ra_alloc1(as, refkey, allow);
+    rset_clear(allow, key);
+    if (irkey[1].o == IR_HIOP) {
+      if (ra_hasreg((irkey+1)->r)) {
+	keynumhi = (irkey+1)->r;
+	keyhi = RID_TMP;
+	ra_noweak(as, keynumhi);
+      } else {
+	keyhi = keynumhi = ra_allocref(as, refkey+1, allow);
+      }
+      rset_clear(allow, keynumhi);
+      khi = 0;
+    }
+#else
+    if (irt_isnum(kt)) {
+      key = ra_scratch(as, allow);
+      rset_clear(allow, key);
+      keyhi = keynumhi = ra_scratch(as, allow);
+      rset_clear(allow, keyhi);
+      khi = 0;
+    } else {
+      key = ra_alloc1(as, refkey, allow);
+      rset_clear(allow, key);
+    }
+#endif
+  } else if (irt_isnum(kt)) {
+    int32_t val = (int32_t)ir_knum(irkey)->u32.lo;
+    k = emit_isk12(ARMI_CMP, val);
+    if (!k) {
+      key = ra_allock(as, val, allow);
+      rset_clear(allow, key);
+    }
+    val = (int32_t)ir_knum(irkey)->u32.hi;
+    khi = emit_isk12(ARMI_CMP, val);
+    if (!khi) {
+      keyhi = ra_allock(as, val, allow);
+      rset_clear(allow, keyhi);
+    }
+  } else if (!irt_ispri(kt)) {
+    k = emit_isk12(ARMI_CMP, irkey->i);
+    if (!k) {
+      key = ra_alloc1(as, refkey, allow);
+      rset_clear(allow, key);
+    }
+  }
+  if (!irt_ispri(kt))
+    tmp = ra_scratchpair(as, allow);
+
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
+  l_end = emit_label(as);
+  as->invmcp = NULL;
+  if (merge == IR_NE)
+    asm_guardcc(as, CC_AL);
+  else if (destused)
+    emit_loada(as, dest, niltvg(J2G(as->J)));
+
+  /* Follow hash chain until the end. */
+  l_loop = --as->mcp;
+  emit_n(as, ARMI_CMP|ARMI_K12|0, dest);
+  emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next));
+
+  /* Type and value comparison. */
+  if (merge == IR_EQ)
+    asm_guardcc(as, CC_EQ);
+  else
+    emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
+  if (!irt_ispri(kt)) {
+    emit_nm(as, ARMF_CC(ARMI_CMP, CC_EQ)^k, tmp, key);
+    emit_nm(as, ARMI_CMP^khi, tmp+1, keyhi);
+    emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key));
+  } else {
+    emit_n(as, ARMI_CMP^khi, tmp);
+    emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it));
+  }
+  *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu);
+
+  /* Load main position relative to tab->node into dest. */
+  khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
+  if (khash == 0) {
+    emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+  } else {
+    emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
+    emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
+    if (irt_isstr(kt)) {  /* Fetch of str->hash is cheaper than ra_allock. */
+      emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP);
+      emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+      emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash));
+      emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
+    } else if (irref_isk(refkey)) {
+      emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash,
+	       rset_exclude(rset_exclude(RSET_GPR, tab), dest));
+      emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+      emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
+    } else {  /* Must match with hash*() in lj_tab.c. */
+      if (ra_hasreg(keynumhi)) {  /* Canonicalize +-0.0 to 0.0. */
+	if (keyhi == RID_TMP)
+	  emit_dm(as, ARMF_CC(ARMI_MOV, CC_NE), keyhi, keynumhi);
+	emit_d(as, ARMF_CC(ARMI_MOV, CC_EQ)|ARMI_K12|0, keyhi);
+      }
+      emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP);
+      emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT3), tmp, tmp, tmp+1);
+      emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+      emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 32-((HASH_ROT2+HASH_ROT1)&31)),
+	       tmp, tmp+1, tmp);
+      emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
+      emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT1), tmp+1, tmp+1, tmp);
+      if (ra_hasreg(keynumhi)) {
+	emit_dnm(as, ARMI_EOR, tmp+1, tmp, key);
+	emit_dnm(as, ARMI_ORR|ARMI_S, RID_TMP, tmp, key);  /* Test for +-0.0. */
+	emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi);
+#if !LJ_SOFTFP
+	emit_dnm(as, ARMI_VMOV_RR_D, key, keynumhi,
+		 (ra_alloc1(as, refkey, RSET_FPR) & 15));
+#endif
+      } else {
+	emit_dnm(as, ARMI_EOR, tmp+1, tmp, key);
+	emit_opk(as, ARMI_ADD, tmp, key, (int32_t)HASH_BIAS,
+		 rset_exclude(rset_exclude(RSET_GPR, tab), key));
+      }
+    }
+  }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+  IRIns *kslot = IR(ir->op2);
+  IRIns *irkey = IR(kslot->op1);
+  int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+  int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+  Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+  Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg key = RID_NONE, type = RID_TMP, idx = node;
+  RegSet allow = rset_exclude(RSET_GPR, node);
+  lua_assert(ofs % sizeof(Node) == 0);
+  if (ofs > 4095) {
+    idx = dest;
+    rset_clear(allow, dest);
+    kofs = (int32_t)offsetof(Node, key);
+  } else if (ra_hasreg(dest)) {
+    emit_opk(as, ARMI_ADD, dest, node, ofs, allow);
+  }
+  asm_guardcc(as, CC_NE);
+  if (!irt_ispri(irkey->t)) {
+    RegSet even = (as->freeset & allow);
+    even = even & (even >> 1) & RSET_GPREVEN;
+    if (even) {
+      key = ra_scratch(as, even);
+      if (rset_test(as->freeset, key+1)) {
+	type = key+1;
+	ra_modified(as, type);
+      }
+    } else {
+      key = ra_scratch(as, allow);
+    }
+    rset_clear(allow, key);
+  }
+  rset_clear(allow, type);
+  if (irt_isnum(irkey->t)) {
+    emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, type,
+	     (int32_t)ir_knum(irkey)->u32.hi, allow);
+    emit_opk(as, ARMI_CMP, 0, key,
+	     (int32_t)ir_knum(irkey)->u32.lo, allow);
+  } else {
+    if (ra_hasreg(key))
+      emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, key, irkey->i, allow);
+    emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype(irkey->t), type);
+  }
+  emit_lso(as, ARMI_LDR, type, idx, kofs+4);
+  if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs);
+  if (ofs > 4095)
+    emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
+}
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+  IRRef args[3];
+  if (ir->r == RID_SINK)
+    return;
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ir->op1;      /* GCtab *t     */
+  args[2] = ASMREF_TMP1;  /* cTValue *key */
+  asm_setupresult(as, ir, ci);  /* TValue * */
+  asm_gencall(as, ci, args);
+  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+  /* NYI: Check that UREFO is still open and not aliasing a slot. */
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  if (irref_isk(ir->op1)) {
+    GCfunc *fn = ir_kfunc(IR(ir->op1));
+    MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+    emit_lsptr(as, ARMI_LDR, dest, v);
+  } else {
+    Reg uv = ra_scratch(as, RSET_GPR);
+    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (ir->o == IR_UREFC) {
+      asm_guardcc(as, CC_NE);
+      emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
+      emit_opk(as, ARMI_ADD, dest, uv,
+	       (int32_t)offsetof(GCupval, tv), RSET_GPR);
+      emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    } else {
+      emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
+    }
+    emit_lso(as, ARMI_LDR, uv, func,
+	     (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
+  }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+  UNUSED(as); UNUSED(ir);
+  lua_assert(!ra_used(ir));
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  IRRef ref = ir->op2, refk = ir->op1;
+  Reg r;
+  if (irref_isk(ref)) {
+    IRRef tmp = refk; refk = ref; ref = tmp;
+  } else if (!irref_isk(refk)) {
+    uint32_t k, m = ARMI_K12|sizeof(GCstr);
+    Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
+    IRIns *irr = IR(ir->op2);
+    if (ra_hasreg(irr->r)) {
+      ra_noweak(as, irr->r);
+      right = irr->r;
+    } else if (mayfuse(as, irr->op2) &&
+	       irr->o == IR_ADD && irref_isk(irr->op2) &&
+	       (k = emit_isk12(ARMI_ADD,
+			       (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) {
+      m = k;
+      right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
+    } else {
+      right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
+    }
+    emit_dn(as, ARMI_ADD^m, dest, dest);
+    emit_dnm(as, ARMI_ADD, dest, left, right);
+    return;
+  }
+  r = ra_alloc1(as, ref, RSET_GPR);
+  emit_opk(as, ARMI_ADD, dest, r,
+	   sizeof(GCstr) + IR(refk)->i, rset_exclude(RSET_GPR, r));
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static ARMIns asm_fxloadins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: return ARMI_LDRSB;
+  case IRT_U8: return ARMI_LDRB;
+  case IRT_I16: return ARMI_LDRSH;
+  case IRT_U16: return ARMI_LDRH;
+  case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D;
+  case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S;
+  default: return ARMI_LDR;
+  }
+}
+
+static ARMIns asm_fxstoreins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: case IRT_U8: return ARMI_STRB;
+  case IRT_I16: case IRT_U16: return ARMI_STRH;
+  case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D;
+  case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S;
+  default: return ARMI_STR;
+  }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
+  ARMIns ai = asm_fxloadins(ir);
+  int32_t ofs;
+  if (ir->op2 == IRFL_TAB_ARRAY) {
+    ofs = asm_fuseabase(as, ir->op1);
+    if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+      emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
+      return;
+    }
+  }
+  ofs = field_ofs[ir->op2];
+  if ((ai & 0x04000000))
+    emit_lso(as, ai, dest, idx, ofs);
+  else
+    emit_lsox(as, ai, dest, idx, ofs);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+    IRIns *irf = IR(ir->op1);
+    Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+    int32_t ofs = field_ofs[irf->op2];
+    ARMIns ai = asm_fxstoreins(ir);
+    if ((ai & 0x04000000))
+      emit_lso(as, ai, src, idx, ofs);
+    else
+      emit_lsox(as, ai, src, idx, ofs);
+  }
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir,
+		     (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+  lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
+}
+
+static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1(as, ir->op2,
+			(!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+    asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+		 rset_exclude(RSET_GPR, src), ofs);
+  }
+}
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+  int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
+  IRType t = hiop ? IRT_NUM : irt_type(ir->t);
+  Reg dest = RID_NONE, type = RID_NONE, idx;
+  RegSet allow = RSET_GPR;
+  int32_t ofs = 0;
+  if (hiop && ra_used(ir+1)) {
+    type = ra_dest(as, ir+1, allow);
+    rset_clear(allow, type);
+  }
+  if (ra_used(ir)) {
+    lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+	       irt_isint(ir->t) || irt_isaddr(ir->t));
+    dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
+    rset_clear(allow, dest);
+  }
+  idx = asm_fuseahuref(as, ir->op1, &ofs, allow,
+		       (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
+  if (!hiop || type == RID_NONE) {
+    rset_clear(allow, idx);
+    if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
+	rset_test((as->freeset & allow), dest+1)) {
+      type = dest+1;
+      ra_modified(as, type);
+    } else {
+      type = RID_TMP;
+    }
+  }
+  asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
+  emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);
+  if (ra_hasreg(dest)) {
+#if !LJ_SOFTFP
+    if (t == IRT_NUM)
+      emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs);
+    else
+#endif
+      emit_lso(as, ARMI_LDR, dest, idx, ofs);
+  }
+  emit_lso(as, ARMI_LDR, type, idx, ofs+4);
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    RegSet allow = RSET_GPR;
+    Reg idx, src = RID_NONE, type = RID_NONE;
+    int32_t ofs = 0;
+#if !LJ_SOFTFP
+    if (irt_isnum(ir->t)) {
+      src = ra_alloc1(as, ir->op2, RSET_FPR);
+      idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024);
+      emit_vlso(as, ARMI_VSTR_D, src, idx, ofs);
+    } else
+#endif
+    {
+      int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
+      if (!irt_ispri(ir->t)) {
+	src = ra_alloc1(as, ir->op2, allow);
+	rset_clear(allow, src);
+      }
+      if (hiop)
+	type = ra_alloc1(as, (ir+1)->op2, allow);
+      else
+	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+      idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096);
+      if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);
+      emit_lso(as, ARMI_STR, type, idx, ofs+4);
+    }
+  }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
+  int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
+  IRType t = hiop ? IRT_NUM : irt_type(ir->t);
+  Reg dest = RID_NONE, type = RID_NONE, base;
+  RegSet allow = RSET_GPR;
+  lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
+  lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+#if LJ_SOFTFP
+  lua_assert(!(ir->op2 & IRSLOAD_CONVERT));  /* Handled by LJ_SOFTFP SPLIT. */
+  if (hiop && ra_used(ir+1)) {
+    type = ra_dest(as, ir+1, allow);
+    rset_clear(allow, type);
+  }
+#else
+  if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) {
+    dest = ra_scratch(as, RSET_FPR);
+    asm_tointg(as, ir, dest);
+    t = IRT_NUM;  /* Continue with a regular number type check. */
+  } else
+#endif
+  if (ra_used(ir)) {
+    Reg tmp = RID_NONE;
+    if ((ir->op2 & IRSLOAD_CONVERT))
+      tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
+    lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+	       irt_isint(ir->t) || irt_isaddr(ir->t));
+    dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
+    rset_clear(allow, dest);
+    base = ra_alloc1(as, REF_BASE, allow);
+    if ((ir->op2 & IRSLOAD_CONVERT)) {
+      if (t == IRT_INT) {
+	emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
+	emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15));
+	t = IRT_NUM;  /* Check for original type. */
+      } else {
+	emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15));
+	emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15));
+	t = IRT_INT;  /* Check for original type. */
+      }
+      dest = tmp;
+    }
+    goto dotypecheck;
+  }
+  base = ra_alloc1(as, REF_BASE, allow);
+dotypecheck:
+  rset_clear(allow, base);
+  if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+    if (ra_noreg(type)) {
+      if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
+	  rset_test((as->freeset & allow), dest+1)) {
+	type = dest+1;
+	ra_modified(as, type);
+      } else {
+	type = RID_TMP;
+      }
+    }
+    asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
+    emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);
+  }
+  if (ra_hasreg(dest)) {
+#if !LJ_SOFTFP
+    if (t == IRT_NUM) {
+      if (ofs < 1024) {
+	emit_vlso(as, ARMI_VLDR_D, dest, base, ofs);
+      } else {
+	if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4);
+	emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0);
+	emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow);
+	return;
+      }
+    } else
+#endif
+      emit_lso(as, ARMI_LDR, dest, base, ofs);
+  }
+  if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4);
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+  CTState *cts = ctype_ctsG(J2G(as->J));
+  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
+  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
+	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+  IRRef args[2];
+  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  RegSet drop = RSET_SCRATCH;
+  lua_assert(sz != CTSIZE_INVALID);
+
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
+  as->gcsteps++;
+
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);
+  if (ra_used(ir))
+    ra_destreg(as, ir, RID_RET);  /* GCcdata * */
+
+  /* Initialize immutable cdata object. */
+  if (ir->o == IR_CNEWI) {
+    int32_t ofs = sizeof(GCcdata);
+    lua_assert(sz == 4 || sz == 8);
+    if (sz == 8) {
+      ofs += 4; ir++;
+      lua_assert(ir->o == IR_HIOP);
+    }
+    for (;;) {
+      Reg r = ra_alloc1(as, ir->op2, allow);
+      emit_lso(as, ARMI_STR, r, RID_RET, ofs);
+      rset_clear(allow, r);
+      if (ofs == sizeof(GCcdata)) break;
+      ofs -= 4; ir--;
+    }
+  }
+  /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
+  {
+    uint32_t k = emit_isk12(ARMI_MOV, ctypeid);
+    Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow);
+    emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
+    emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
+    emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
+    if (k) emit_d(as, ARMI_MOV^k, RID_R1);
+  }
+  asm_gencall(as, ci, args);
+  ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+	       ra_releasetmp(as, ASMREF_TMP1));
+}
+#else
+#define asm_cnew(as, ir)	((void)0)
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+  Reg gr = ra_allock(as, i32ptr(J2G(as->J)),
+		     rset_exclude(rset_exclude(RSET_GPR, tab), link));
+  Reg mark = RID_TMP;
+  MCLabel l_end = emit_label(as);
+  emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist));
+  emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+  emit_lso(as, ARMI_STR, tab, gr,
+	   (int32_t)offsetof(global_State, gc.grayagain));
+  emit_dn(as, ARMI_BIC|ARMI_K12|LJ_GC_BLACK, mark, mark);
+  emit_lso(as, ARMI_LDR, link, gr,
+	   (int32_t)offsetof(global_State, gc.grayagain));
+  emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
+  emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_BLACK, mark);
+  emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg obj, val, tmp;
+  /* No need for other object barriers (yet). */
+  lua_assert(IR(ir->op1)->o == IR_UREFC);
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ir->op1;      /* TValue *tv      */
+  asm_gencall(as, ci, args);
+  if ((l_end[-1] >> 28) == CC_AL)
+    l_end[-1] = ARMF_CC(l_end[-1], CC_NE);
+  else
+    emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
+  ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1));
+  obj = IR(ir->op1)->r;
+  tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
+  emit_n(as, ARMF_CC(ARMI_TST, CC_NE)|ARMI_K12|LJ_GC_BLACK, tmp);
+  emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_WHITES, RID_TMP);
+  val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
+  emit_lso(as, ARMI_LDRB, tmp, obj,
+	   (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
+  emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
+}
+
+/* -- Arithmetic and logic operations ------------------------------------- */
+
+#if !LJ_SOFTFP
+static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = (left >> 8); left &= 255;
+  emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15));
+}
+
+static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+  emit_dm(as, ai, (dest & 15), (left & 15));
+}
+
+static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
+{
+  IRIns *irp = IR(ir->op1);
+  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+    IRIns *irpp = IR(irp->op1);
+    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
+      IRRef args[2];
+      args[0] = irpp->op1;
+      args[1] = irp->op2;
+      asm_setupresult(as, ir, ci);
+      asm_gencall(as, ci, args);
+      return 1;
+    }
+  }
+  return 0;
+}
+#endif
+
+static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
+{
+  IRIns *ir;
+  if (irref_isk(rref))
+    return 0;  /* Don't swap constants to the left. */
+  if (irref_isk(lref))
+    return 1;  /* But swap constants to the right. */
+  ir = IR(rref);
+  if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
+      (ir->o == IR_ADD && ir->op1 == ir->op2))
+    return 0;  /* Don't swap fusable operands to the left. */
+  ir = IR(lref);
+  if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
+      (ir->o == IR_ADD && ir->op1 == ir->op2))
+    return 1;  /* But swap fusable operands to the right. */
+  return 0;  /* Otherwise don't swap. */
+}
+
+static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  Reg left, dest = ra_dest(as, ir, RSET_GPR);
+  uint32_t m;
+  if (asm_swapops(as, lref, rref)) {
+    IRRef tmp = lref; lref = rref; rref = tmp;
+    if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC)
+      ai ^= (ARMI_SUB^ARMI_RSB);
+  }
+  left = ra_hintalloc(as, lref, dest, RSET_GPR);
+  m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
+  if (irt_isguard(ir->t)) {  /* For IR_ADDOV etc. */
+    asm_guardcc(as, CC_VS);
+    ai |= ARMI_S;
+  }
+  emit_dn(as, ai^m, dest, left);
+}
+
+static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
+{
+  if (as->flagmcp == as->mcp) {  /* Drop cmp r, #0. */
+    as->flagmcp = NULL;
+    as->mcp++;
+    ai |= ARMI_S;
+  }
+  asm_intop(as, ir, ai);
+}
+
+static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
+{
+  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
+    uint32_t cc = (as->mcp[1] >> 28);
+    as->flagmcp = NULL;
+    if (cc <= CC_NE) {
+      as->mcp++;
+      ai |= ARMI_S;
+    } else if (cc == CC_GE) {
+      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
+      ai |= ARMI_S;
+    } else if (cc == CC_LT) {
+      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
+      ai |= ARMI_S;
+    }  /* else: other conds don't work with bit ops. */
+  }
+  if (ir->op2 == 0) {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+    emit_d(as, ai^m, dest);
+  } else {
+    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
+    asm_intop(as, ir, ai);
+  }
+}
+
+static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  emit_dn(as, ai|ARMI_K12|0, dest, left);
+}
+
+/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
+static void asm_intmul(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
+  Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  Reg tmp = RID_NONE;
+  /* ARMv5 restriction: dest != left and dest_hi != left. */
+  if (dest == left && left != right) { left = right; right = dest; }
+  if (irt_isguard(ir->t)) {  /* IR_MULOV */
+    if (!(as->flags & JIT_F_ARMV6) && dest == left)
+      tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left));
+    asm_guardcc(as, CC_NE);
+    emit_nm(as, ARMI_TEQ|ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest);
+    emit_dnm(as, ARMI_SMULL|ARMF_S(right), dest, RID_TMP, left);
+  } else {
+    if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP;
+    emit_nm(as, ARMI_MUL|ARMF_S(right), dest, left);
+  }
+  /* Only need this for the dest == left == right case. */
+  if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right);
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+  if (irt_isnum(ir->t)) {
+    if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D))
+      asm_fparith(as, ir, ARMI_VADD_D);
+    return;
+  }
+#endif
+  asm_intop_s(as, ir, ARMI_ADD);
+}
+
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+  if (irt_isnum(ir->t)) {
+    if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D))
+      asm_fparith(as, ir, ARMI_VSUB_D);
+    return;
+  }
+#endif
+  asm_intop_s(as, ir, ARMI_SUB);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+  if (irt_isnum(ir->t)) {
+    asm_fparith(as, ir, ARMI_VMUL_D);
+    return;
+  }
+#endif
+  asm_intmul(as, ir);
+}
+
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+  if (irt_isnum(ir->t)) {
+    asm_fpunary(as, ir, ARMI_VNEG_D);
+    return;
+  }
+#endif
+  asm_intneg(as, ir, ARMI_RSB);
+}
+
+static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[id];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+#if !LJ_SOFTFP
+static void asm_callround(ASMState *as, IRIns *ir, int id)
+{
+  /* The modified regs must match with the *.dasc implementation. */
+  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
+		RID2RSET(RID_R3)|RID2RSET(RID_R12);
+  RegSet of;
+  Reg dest, src;
+  ra_evictset(as, drop);
+  dest = ra_dest(as, ir, RSET_FPR);
+  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
+  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
+		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
+				   (void *)lj_vm_trunc_sf);
+  /* Workaround to protect argument GPRs from being used for remat. */
+  of = as->freeset;
+  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
+  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
+  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
+  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
+  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
+}
+#endif
+
+static void asm_bitswap(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+  if ((as->flags & JIT_F_ARMV6)) {
+    emit_dm(as, ARMI_REV, dest, left);
+  } else {
+    Reg tmp2 = dest;
+    if (tmp2 == left)
+      tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left));
+    emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP);
+    emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_ROR, 8), tmp2, left);
+    emit_dn(as, ARMI_BIC|ARMI_K12|256*8|255, RID_TMP, RID_TMP);
+    emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left);
+  }
+}
+
+static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
+{
+  if (irref_isk(ir->op2)) {  /* Constant shifts. */
+    /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */
+    /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+    int32_t shift = (IR(ir->op2)->i & 31);
+    emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, left);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+    Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    emit_dm(as, ARMI_MOV|ARMF_RSH(sh, right), dest, left);
+  }
+}
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
+{
+  uint32_t kcmp = 0, kmov = 0;
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  Reg right = 0;
+  if (irref_isk(ir->op2)) {
+    kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i);
+    if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i);
+  }
+  if (!kmov) {
+    kcmp = 0;
+    right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  }
+  if (kmov || dest != right) {
+    emit_dm(as, ARMF_CC(ARMI_MOV, cc)^kmov, dest, right);
+    cc ^= 1;  /* Must use opposite conditions for paired moves. */
+  } else {
+    cc ^= (CC_LT^CC_GT);  /* Otherwise may swap CC_LT <-> CC_GT. */
+  }
+  if (dest != left) emit_dm(as, ARMF_CC(ARMI_MOV, cc), dest, left);
+  emit_nm(as, ARMI_CMP^kcmp, left, right);
+}
+
+#if LJ_SOFTFP
+static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
+  RegSet drop = RSET_SCRATCH;
+  Reg r;
+  IRRef args[4];
+  args[0] = ir->op1; args[1] = (ir+1)->op1;
+  args[2] = ir->op2; args[3] = (ir+1)->op2;
+  /* __aeabi_cdcmple preserves r0-r3. */
+  if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
+  if (ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r);
+  if (!rset_test(as->freeset, RID_R2) &&
+      regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2);
+  if (!rset_test(as->freeset, RID_R3) &&
+      regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3);
+  ra_evictset(as, drop);
+  ra_destpair(as, ir);
+  emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETHI, RID_R3);
+  emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETLO, RID_R2);
+  emit_call(as, (void *)ci->func);
+  for (r = RID_R0; r <= RID_R3; r++)
+    ra_leftov(as, r, args[r-RID_R0]);
+}
+#else
+static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc)
+{
+  Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = ((left >> 8) & 15); left &= 15;
+  if (dest != left) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc^1), dest, left);
+  if (dest != right) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc), dest, right);
+  emit_d(as, ARMI_VMRS, 0);
+  emit_dm(as, ARMI_VCMP_D, left, right);
+}
+#endif
+
+static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
+{
+#if LJ_SOFTFP
+  UNUSED(fcc);
+#else
+  if (irt_isnum(ir->t))
+    asm_fpmin_max(as, ir, fcc);
+  else
+#endif
+    asm_intmin_max(as, ir, cc);
+}
+
+/* -- Comparisons --------------------------------------------------------- */
+
+/* Map of comparisons to flags. ORDER IR. */
+static const uint8_t asm_compmap[IR_ABC+1] = {
+  /* op  FP swp  int cc   FP cc */
+  /* LT       */ CC_GE + (CC_HS << 4),
+  /* GE    x  */ CC_LT + (CC_HI << 4),
+  /* LE       */ CC_GT + (CC_HI << 4),
+  /* GT    x  */ CC_LE + (CC_HS << 4),
+  /* ULT   x  */ CC_HS + (CC_LS << 4),
+  /* UGE      */ CC_LO + (CC_LO << 4),
+  /* ULE   x  */ CC_HI + (CC_LO << 4),
+  /* UGT      */ CC_LS + (CC_LS << 4),
+  /* EQ       */ CC_NE + (CC_NE << 4),
+  /* NE       */ CC_EQ + (CC_EQ << 4),
+  /* ABC      */ CC_LS + (CC_LS << 4)  /* Same as UGT. */
+};
+
+#if LJ_SOFTFP
+/* FP comparisons. */
+static void asm_sfpcomp(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
+  RegSet drop = RSET_SCRATCH;
+  Reg r;
+  IRRef args[4];
+  int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1);
+  args[swp^0] = ir->op1; args[swp^1] = (ir+1)->op1;
+  args[swp^2] = ir->op2; args[swp^3] = (ir+1)->op2;
+  /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */
+  for (r = RID_R0; r <= RID_R3; r++)
+    if (!rset_test(as->freeset, r) &&
+	regcost_ref(as->cost[r]) == args[r-RID_R0]) rset_clear(drop, r);
+  ra_evictset(as, drop);
+  asm_guardcc(as, (asm_compmap[ir->o] >> 4));
+  emit_call(as, (void *)ci->func);
+  for (r = RID_R0; r <= RID_R3; r++)
+    ra_leftov(as, r, args[r-RID_R0]);
+}
+#else
+/* FP comparisons. */
+static void asm_fpcomp(ASMState *as, IRIns *ir)
+{
+  Reg left, right;
+  ARMIns ai;
+  int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
+  if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
+    left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15);
+    right = 0;
+    ai = ARMI_VCMPZ_D;
+  } else {
+    left = ra_alloc2(as, ir, RSET_FPR);
+    if (swp) {
+      right = (left & 15); left = ((left >> 8) & 15);
+    } else {
+      right = ((left >> 8) & 15); left &= 15;
+    }
+    ai = ARMI_VCMP_D;
+  }
+  asm_guardcc(as, (asm_compmap[ir->o] >> 4));
+  emit_d(as, ARMI_VMRS, 0);
+  emit_dm(as, ai, left, right);
+}
+#endif
+
+/* Integer comparisons. */
+static void asm_intcomp(ASMState *as, IRIns *ir)
+{
+  ARMCC cc = (asm_compmap[ir->o] & 15);
+  IRRef lref = ir->op1, rref = ir->op2;
+  Reg left;
+  uint32_t m;
+  int cmpprev0 = 0;
+  lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
+  if (asm_swapops(as, lref, rref)) {
+    Reg tmp = lref; lref = rref; rref = tmp;
+    if (cc >= CC_GE) cc ^= 7;  /* LT <-> GT, LE <-> GE */
+    else if (cc > CC_NE) cc ^= 11;  /* LO <-> HI, LS <-> HS */
+  }
+  if (irref_isk(rref) && IR(rref)->i == 0) {
+    IRIns *irl = IR(lref);
+    cmpprev0 = (irl+1 == ir);
+    /* Combine comp(BAND(left, right), 0) into tst left, right. */
+    if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
+      IRRef blref = irl->op1, brref = irl->op2;
+      uint32_t m2 = 0;
+      Reg bleft;
+      if (asm_swapops(as, blref, brref)) {
+	Reg tmp = blref; blref = brref; brref = tmp;
+      }
+      if (irref_isk(brref)) {
+	m2 = emit_isk12(ARMI_AND, IR(brref)->i);
+	if ((m2 & (ARMI_AND^ARMI_BIC)))
+	  goto notst;  /* Not beneficial if we miss a constant operand. */
+      }
+      if (cc == CC_GE) cc = CC_PL;
+      else if (cc == CC_LT) cc = CC_MI;
+      else if (cc > CC_NE) goto notst;  /* Other conds don't work with tst. */
+      bleft = ra_alloc1(as, blref, RSET_GPR);
+      if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft));
+      asm_guardcc(as, cc);
+      emit_n(as, ARMI_TST^m2, bleft);
+      return;
+    }
+  }
+notst:
+  left = ra_alloc1(as, lref, RSET_GPR);
+  m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left));
+  asm_guardcc(as, cc);
+  emit_n(as, ARMI_CMP^m, left);
+  /* Signed comparison with zero and referencing previous ins? */
+  if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE))
+    as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
+}
+
+#if LJ_HASFFI
+/* 64 bit integer comparisons. */
+static void asm_int64comp(ASMState *as, IRIns *ir)
+{
+  int signedcomp = (ir->o <= IR_GT);
+  ARMCC cclo, cchi;
+  Reg leftlo, lefthi;
+  uint32_t mlo, mhi;
+  RegSet allow = RSET_GPR, oldfree;
+
+  /* Always use unsigned comparison for loword. */
+  cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15;
+  leftlo = ra_alloc1(as, ir->op1, allow);
+  oldfree = as->freeset;
+  mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo));
+  allow &= ~(oldfree & ~as->freeset);  /* Update for allocs of asm_fuseopm. */
+
+  /* Use signed or unsigned comparison for hiword. */
+  cchi = asm_compmap[ir->o] & 15;
+  lefthi = ra_alloc1(as, (ir+1)->op1, allow);
+  mhi = asm_fuseopm(as, ARMI_CMP, (ir+1)->op2, rset_clear(allow, lefthi));
+
+  /* All register allocations must be performed _before_ this point. */
+  if (signedcomp) {
+    MCLabel l_around = emit_label(as);
+    asm_guardcc(as, cclo);
+    emit_n(as, ARMI_CMP^mlo, leftlo);
+    emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around);
+    if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6;  /* GE -> GT, LE -> LT */
+    asm_guardcc(as, cchi);
+  } else {
+    asm_guardcc(as, cclo);
+    emit_n(as, ARMF_CC(ARMI_CMP, CC_EQ)^mlo, leftlo);
+  }
+  emit_n(as, ARMI_CMP^mhi, lefthi);
+}
+#endif
+
+/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+
+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+#if LJ_HASFFI || LJ_SOFTFP
+  /* HIOP is marked as a store because it needs its own DCE logic. */
+  int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
+  if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+  if ((ir-1)->o <= IR_NE) {  /* 64 bit integer or FP comparisons. ORDER IR. */
+    as->curins--;  /* Always skip the loword comparison. */
+#if LJ_SOFTFP
+    if (!irt_isint(ir->t)) {
+      asm_sfpcomp(as, ir-1);
+      return;
+    }
+#endif
+#if LJ_HASFFI
+    asm_int64comp(as, ir-1);
+#endif
+    return;
+#if LJ_SOFTFP
+  } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
+    as->curins--;  /* Always skip the loword min/max. */
+    if (uselo || usehi)
+      asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
+    return;
+#elif LJ_HASFFI
+  } else if ((ir-1)->o == IR_CONV) {
+    as->curins--;  /* Always skip the CONV. */
+    if (usehi || uselo)
+      asm_conv64(as, ir);
+    return;
+#endif
+  } else if ((ir-1)->o == IR_XSTORE) {
+    if ((ir-1)->r != RID_SINK)
+      asm_xstore(as, ir, 4);
+    return;
+  }
+  if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
+  switch ((ir-1)->o) {
+#if LJ_HASFFI
+  case IR_ADD:
+    as->curins--;
+    asm_intop(as, ir, ARMI_ADC);
+    asm_intop(as, ir-1, ARMI_ADD|ARMI_S);
+    break;
+  case IR_SUB:
+    as->curins--;
+    asm_intop(as, ir, ARMI_SBC);
+    asm_intop(as, ir-1, ARMI_SUB|ARMI_S);
+    break;
+  case IR_NEG:
+    as->curins--;
+    asm_intneg(as, ir, ARMI_RSC);
+    asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);
+    break;
+#endif
+#if LJ_SOFTFP
+  case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+  case IR_STRTO:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
+    break;
+#endif
+  case IR_CALLN:
+  case IR_CALLS:
+  case IR_CALLXS:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
+    break;
+#if LJ_SOFTFP
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
+#endif
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
+  default: lua_assert(0); break;
+  }
+#else
+  UNUSED(as); UNUSED(ir); lua_assert(0);
+#endif
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as, BCReg topslot,
+			    IRIns *irp, RegSet allow, ExitNo exitno)
+{
+  Reg pbase;
+  uint32_t k;
+  if (irp) {
+    if (!ra_hasspill(irp->s)) {
+      pbase = irp->r;
+      lua_assert(ra_hasreg(pbase));
+    } else if (allow) {
+      pbase = rset_pickbot(allow);
+    } else {
+      pbase = RID_RET;
+      emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0);  /* Restore temp. register. */
+    }
+  } else {
+    pbase = RID_BASE;
+  }
+  emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
+  k = emit_isk12(0, (int32_t)(8*topslot));
+  lua_assert(k);
+  emit_n(as, ARMI_CMP^k, RID_TMP);
+  emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
+  emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
+	   (int32_t)offsetof(lua_State, maxstack));
+  if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
+    int32_t i = i32ptr(&J2G(as->J)->jit_L);
+    if (ra_hasspill(irp->s))
+      emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
+    emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
+    if (ra_hasspill(irp->s) && !allow)
+      emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
+    emit_loadi(as, RID_TMP, (i & ~4095));
+  } else {
+    emit_getgl(as, RID_TMP, jit_L);
+  }
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+  SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
+  MSize n, nent = snap->nent;
+  /* Store the value of all modified slots to the Lua stack. */
+  for (n = 0; n < nent; n++) {
+    SnapEntry sn = map[n];
+    BCReg s = snap_slot(sn);
+    int32_t ofs = 8*((int32_t)s-1);
+    IRRef ref = snap_ref(sn);
+    IRIns *ir = IR(ref);
+    if ((sn & SNAP_NORESTORE))
+      continue;
+    if (irt_isnum(ir->t)) {
+#if LJ_SOFTFP
+      RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
+      Reg tmp;
+      lua_assert(irref_isk(ref));  /* LJ_SOFTFP: must be a number constant. */
+      tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo,
+		      rset_exclude(RSET_GPREVEN, RID_BASE));
+      emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
+      if (rset_test(as->freeset, tmp+1)) odd = RID2RSET(tmp+1);
+      tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd);
+      emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4);
+#else
+      Reg src = ra_alloc1(as, ref, RSET_FPR);
+      emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);
+#endif
+    } else {
+      RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
+      Reg type;
+      lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
+	emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
+	if (rset_test(as->freeset, src+1)) odd = RID2RSET(src+1);
+      }
+      if ((sn & (SNAP_CONT|SNAP_FRAME))) {
+	if (s == 0) continue;  /* Do not overwrite link to previous frame. */
+	type = ra_allock(as, (int32_t)(*flinks--), odd);
+#if LJ_SOFTFP
+      } else if ((sn & SNAP_SOFTFPNUM)) {
+	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));
+#endif
+      } else {
+	type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
+      }
+      emit_lso(as, ARMI_STR, type, RID_BASE, ofs+4);
+    }
+    checkmclim(as);
+  }
+  lua_assert(map + nent == flinks);
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg tmp1, tmp2;
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+  asm_guardcc(as, CC_NE);  /* Assumes asm_snap_prep() already done. */
+  emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ASMREF_TMP2;  /* MSize steps     */
+  asm_gencall(as, ci, args);
+  tmp1 = ra_releasetmp(as, ASMREF_TMP1);
+  tmp2 = ra_releasetmp(as, ASMREF_TMP2);
+  emit_loadi(as, tmp2, as->gcsteps);
+  /* Jump around GC step if GC total < GC threshold. */
+  emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end);
+  emit_nm(as, ARMI_CMP, RID_TMP, tmp2);
+  emit_lso(as, ARMI_LDR, tmp2, tmp1,
+	   (int32_t)offsetof(global_State, gc.threshold));
+  emit_lso(as, ARMI_LDR, RID_TMP, tmp1,
+	   (int32_t)offsetof(global_State, gc.total));
+  ra_allockreg(as, i32ptr(J2G(as->J)), tmp1);
+  as->gcsteps = 0;
+  checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+  MCode *p = as->mctop;
+  MCode *target = as->mcp;
+  if (as->loopinv) {  /* Inverted loop branch? */
+    /* asm_guardcc already inverted the bcc and patched the final bl. */
+    p[-2] |= ((uint32_t)(target-p) & 0x00ffffffu);
+  } else {
+    p[-1] = ARMI_B | ((uint32_t)((target-p)-1) & 0x00ffffffu);
+  }
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Reload L register from g->jit_L. */
+static void asm_head_lreg(ASMState *as)
+{
+  IRIns *ir = IR(ASMREF_L);
+  if (ra_used(ir)) {
+    Reg r = ra_dest(as, ir, RSET_GPR);
+    emit_getgl(as, r, jit_L);
+    ra_evictk(as);
+  }
+}
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+  IRIns *ir;
+  asm_head_lreg(as);
+  ir = IR(REF_BASE);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
+  ra_destreg(as, ir, RID_BASE);
+}
+
+/* Coalesce BASE register for a side trace. */
+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+{
+  IRIns *ir;
+  asm_head_lreg(as);
+  ir = IR(REF_BASE);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
+  if (ra_hasspill(irp->s)) {
+    rset_clear(allow, ra_dest(as, ir, allow));
+  } else {
+    Reg r = irp->r;
+    lua_assert(ra_hasreg(r));
+    rset_clear(allow, r);
+    if (r != ir->r && !rset_test(as->freeset, r))
+      ra_restore(as, regcost_ref(as->cost[r]));
+    ra_destreg(as, ir, r);
+  }
+  return allow;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+  MCode *p = as->mctop;
+  MCode *target;
+  int32_t spadj = as->T->spadjust;
+  if (spadj == 0) {
+    as->mctop = --p;
+  } else {
+    /* Patch stack adjustment. */
+    uint32_t k = emit_isk12(ARMI_ADD, spadj);
+    lua_assert(k);
+    p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+  }
+  /* Patch exit branch. */
+  target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
+  p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu);
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+  MCode *p = as->mctop - 1;  /* Leave room for exit branch. */
+  if (as->loopref) {
+    as->invmcp = as->mcp = p;
+  } else {
+    as->mcp = p-1;  /* Leave room for stack pointer adjustment. */
+    as->invmcp = NULL;
+  }
+  *p = 0;  /* Prevent load/store merging. */
+}
+
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+  switch ((IROp)ir->o) {
+  /* Miscellaneous ops. */
+  case IR_LOOP: asm_loop(as); break;
+  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
+  case IR_USE:
+    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+  case IR_PHI: asm_phi(as, ir); break;
+  case IR_HIOP: asm_hiop(as, ir); break;
+  case IR_GCSTEP: asm_gcstep(as, ir); break;
+
+  /* Guarded assertions. */
+  case IR_EQ: case IR_NE:
+    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+      as->curins--;
+      asm_href(as, ir-1, (IROp)ir->o);
+      break;
+    }
+    /* fallthrough */
+  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
+  case IR_ABC:
+#if !LJ_SOFTFP
+    if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
+#endif
+    asm_intcomp(as, ir);
+    break;
+
+  case IR_RETF: asm_retf(as, ir); break;
+
+  /* Bit ops. */
+  case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
+  case IR_BSWAP: asm_bitswap(as, ir); break;
+
+  case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
+  case IR_BOR:  asm_bitop(as, ir, ARMI_ORR); break;
+  case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
+
+  case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
+  case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
+  case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
+  case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
+  case IR_BROL: lua_assert(0); break;
+
+  /* Arithmetic ops. */
+  case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
+  case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
+  case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
+  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
+  case IR_NEG: asm_neg(as, ir); break;
+
+#if LJ_SOFTFP
+  case IR_DIV: case IR_POW: case IR_ABS:
+  case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
+    lua_assert(0);  /* Unused for LJ_SOFTFP. */
+    break;
+#else
+  case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
+  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
+  case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
+  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
+  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
+  case IR_FPMATH:
+    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+      break;
+    if (ir->op2 <= IRFPM_TRUNC)
+      asm_callround(as, ir, ir->op2);
+    else if (ir->op2 == IRFPM_SQRT)
+      asm_fpunary(as, ir, ARMI_VSQRT_D);
+    else
+      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
+    break;
+  case IR_TOBIT: asm_tobit(as, ir); break;
+#endif
+
+  case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
+  case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
+
+  /* Memory references. */
+  case IR_AREF: asm_aref(as, ir); break;
+  case IR_HREF: asm_href(as, ir, 0); break;
+  case IR_HREFK: asm_hrefk(as, ir); break;
+  case IR_NEWREF: asm_newref(as, ir); break;
+  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+  case IR_FREF: asm_fref(as, ir); break;
+  case IR_STRREF: asm_strref(as, ir); break;
+
+  /* Loads and stores. */
+  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+    asm_ahuvload(as, ir);
+    break;
+  case IR_FLOAD: asm_fload(as, ir); break;
+  case IR_XLOAD: asm_xload(as, ir); break;
+  case IR_SLOAD: asm_sload(as, ir); break;
+
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+  case IR_FSTORE: asm_fstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir, 0); break;
+
+  /* Allocations. */
+  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+  case IR_TNEW: asm_tnew(as, ir); break;
+  case IR_TDUP: asm_tdup(as, ir); break;
+  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
+
+  /* Write barriers. */
+  case IR_TBAR: asm_tbar(as, ir); break;
+  case IR_OBAR: asm_obar(as, ir); break;
+
+  /* Type conversions. */
+  case IR_CONV: asm_conv(as, ir); break;
+  case IR_TOSTR: asm_tostr(as, ir); break;
+  case IR_STRTO: asm_strto(as, ir); break;
+
+  /* Calls. */
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+  case IR_CALLXS: asm_callx(as, ir); break;
+  case IR_CARG: break;
+
+  default:
+    setintV(&as->J->errinfo, ir->o);
+    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+    break;
+  }
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
+  asm_collectargs(as, ir, ci, args);
+  for (i = 0; i < nargs; i++) {
+    if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
+      if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {
+	if (irt_isnum(IR(args[i])->t)) {
+	  if (nfpr > 0) nfpr--;
+	  else fprodd = 0, nslots = (nslots + 3) & ~1;
+	} else {
+	  if (fprodd) fprodd--;
+	  else if (nfpr > 0) fprodd = 1, nfpr--;
+	  else nslots++;
+	}
+      } else if (irt_isnum(IR(args[i])->t)) {
+	ngpr &= ~1;
+	if (ngpr > 0) ngpr -= 2; else nslots += 2;
+      } else {
+	if (ngpr > 0) ngpr--; else nslots++;
+      }
+    } else {
+      if (ngpr > 0) ngpr--; else nslots++;
+    }
+  }
+  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
+    as->evenspill = nslots;
+  return REGSP_HINT(RID_RET);
+}
+
+static void asm_setup_target(ASMState *as)
+{
+  /* May need extra exit for asm_stack_check on side traces. */
+  asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
+}
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+  MCode *p = T->mcode;
+  MCode *pe = (MCode *)((char *)p + T->szmcode);
+  MCode *cstart = NULL, *cend = p;
+  MCode *mcarea = lj_mcode_patch(J, p, 0);
+  MCode *px = exitstub_addr(J, exitno) - 2;
+  for (; p < pe; p++) {
+    /* Look for bl_cc exitstub, replace with b_cc target. */
+    uint32_t ins = *p;
+    if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u &&
+	((ins ^ (px-p)) & 0x00ffffffu) == 0) {
+      *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu);
+      cend = p+1;
+      if (!cstart) cstart = p;
+    }
+  }
+  lua_assert(cstart != NULL);
+  lj_mcode_sync(cstart, cend);
+  lj_mcode_patch(J, mcarea, 1);
+}
+

+ 1977 - 0
luajit.mod/luajit/src/lj_asm_mips.h

@@ -0,0 +1,1977 @@
+/*
+** MIPS IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* -- Register allocator extensions --------------------------------------- */
+
+/* Allocate a register with a hint. */
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
+{
+  Reg r = IR(ref)->r;
+  if (ra_noreg(r)) {
+    if (!ra_hashint(r) && !iscrossref(as, ref))
+      ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
+    r = ra_allocref(as, ref, allow);
+  }
+  ra_noweak(as, r);
+  return r;
+}
+
+/* Allocate a register or RID_ZERO. */
+static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
+{
+  Reg r = IR(ref)->r;
+  if (ra_noreg(r)) {
+    if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0)
+      return RID_ZERO;
+    r = ra_allocref(as, ref, allow);
+  } else {
+    ra_noweak(as, r);
+  }
+  return r;
+}
+
+/* Allocate two source registers for three-operand instructions. */
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+{
+  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+  Reg left = irl->r, right = irr->r;
+  if (ra_hasreg(left)) {
+    ra_noweak(as, left);
+    if (ra_noreg(right))
+      right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
+    else
+      ra_noweak(as, right);
+  } else if (ra_hasreg(right)) {
+    ra_noweak(as, right);
+    left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
+  } else if (ra_hashint(right)) {
+    right = ra_alloc1z(as, ir->op2, allow);
+    left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
+  } else {
+    left = ra_alloc1z(as, ir->op1, allow);
+    right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
+  }
+  return left | (right << 8);
+}
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Need some spare long-range jump slots, for out-of-range branches. */
+#define MIPS_SPAREJUMP		4
+
+/* Setup spare long-range jump slots per mcarea. */
+static void asm_sparejump_setup(ASMState *as)
+{
+  MCode *mxp = as->mcbot;
+  /* Assumes sizeof(MCLink) == 8. */
+  if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) {
+    lua_assert(MIPSI_NOP == 0);
+    memset(mxp+2, 0, MIPS_SPAREJUMP*8);
+    mxp += MIPS_SPAREJUMP*2;
+    lua_assert(mxp < as->mctop);
+    lj_mcode_sync(as->mcbot, mxp);
+    lj_mcode_commitbot(as->J, mxp);
+    as->mcbot = mxp;
+    as->mclim = as->mcbot + MCLIM_REDZONE;
+  }
+}
+
+/* Setup exit stub after the end of each trace. */
+static void asm_exitstub_setup(ASMState *as)
+{
+  MCode *mxp = as->mctop;
+  /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
+  *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
+  *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
+  lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0);
+  *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
+  as->mctop = mxp;
+}
+
+/* Keep this in-sync with exitstub_trace_addr(). */
+#define asm_exitstub_addr(as)	((as)->mctop)
+
+/* Emit conditional branch to exit for guard. */
+static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
+{
+  MCode *target = asm_exitstub_addr(as);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->invmcp = NULL;
+    as->loopinv = 1;
+    as->mcp = p+1;
+    mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u);  /* Invert cond. */
+    target = p;  /* Patch target later in asm_loop_fixup. */
+  }
+  emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
+  emit_branch(as, mi, rs, rt, target);
+}
+
+/* -- Operand fusion ------------------------------------------------------ */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM	31
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+  IRIns *ir = as->ir;
+  IRRef i = as->curins;
+  if (i > ref + CONFLICT_SEARCH_LIM)
+    return 0;  /* Give up, ref is too far away. */
+  while (--i > ref)
+    if (ir[i].o == conflict)
+      return 0;  /* Conflict found. */
+  return 1;  /* Ok, no conflict. */
+}
+
+/* Fuse the array base of colocated arrays. */
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
+      !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
+    return (int32_t)sizeof(GCtab);
+  return 0;
+}
+
+/* Fuse array/hash/upvalue reference into register+offset operand. */
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  if (ra_noreg(ir->r)) {
+    if (ir->o == IR_AREF) {
+      if (mayfuse(as, ref)) {
+	if (irref_isk(ir->op2)) {
+	  IRRef tab = IR(ir->op1)->op1;
+	  int32_t ofs = asm_fuseabase(as, tab);
+	  IRRef refa = ofs ? tab : ir->op1;
+	  ofs += 8*IR(ir->op2)->i;
+	  if (checki16(ofs)) {
+	    *ofsp = ofs;
+	    return ra_alloc1(as, refa, allow);
+	  }
+	}
+      }
+    } else if (ir->o == IR_HREFK) {
+      if (mayfuse(as, ref)) {
+	int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+	if (checki16(ofs)) {
+	  *ofsp = ofs;
+	  return ra_alloc1(as, ir->op1, allow);
+	}
+      }
+    } else if (ir->o == IR_UREFC) {
+      if (irref_isk(ir->op1)) {
+	GCfunc *fn = ir_kfunc(IR(ir->op1));
+	int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
+	int32_t jgl = (intptr_t)J2G(as->J);
+	if ((uint32_t)(ofs-jgl) < 65536) {
+	  *ofsp = ofs-jgl-32768;
+	  return RID_JGL;
+	} else {
+	  *ofsp = (int16_t)ofs;
+	  return ra_allock(as, ofs-(int16_t)ofs, allow);
+	}
+      }
+    }
+  }
+  *ofsp = 0;
+  return ra_alloc1(as, ref, allow);
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
+			 RegSet allow, int32_t ofs)
+{
+  IRIns *ir = IR(ref);
+  Reg base;
+  if (ra_noreg(ir->r) && canfuse(as, ir)) {
+    if (ir->o == IR_ADD) {
+      int32_t ofs2;
+      if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
+	ref = ir->op1;
+	ofs = ofs2;
+      }
+    } else if (ir->o == IR_STRREF) {
+      int32_t ofs2 = 65536;
+      lua_assert(ofs == 0);
+      ofs = (int32_t)sizeof(GCstr);
+      if (irref_isk(ir->op2)) {
+	ofs2 = ofs + IR(ir->op2)->i;
+	ref = ir->op1;
+      } else if (irref_isk(ir->op1)) {
+	ofs2 = ofs + IR(ir->op1)->i;
+	ref = ir->op2;
+      }
+      if (!checki16(ofs2)) {
+	/* NYI: Fuse ADD with constant. */
+	Reg right, left = ra_alloc2(as, ir, allow);
+	right = (left >> 8); left &= 255;
+	emit_hsi(as, mi, rt, RID_TMP, ofs);
+	emit_dst(as, MIPSI_ADDU, RID_TMP, left, right);
+	return;
+      }
+      ofs = ofs2;
+    }
+  }
+  base = ra_alloc1(as, ref, allow);
+  emit_hsi(as, mi, rt, base, ofs);
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n, nargs = CCI_NARGS(ci);
+  int32_t ofs = 16;
+  Reg gpr, fpr = REGARG_FIRSTFPR;
+  if ((void *)ci->func)
+    emit_call(as, (void *)ci->func);
+  for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
+    as->cost[gpr] = REGCOST(~0u, ASMREF_L);
+  gpr = REGARG_FIRSTGPR;
+  for (n = 0; n < nargs; n++) {  /* Setup args. */
+    IRRef ref = args[n];
+    if (ref) {
+      IRIns *ir = IR(ref);
+      if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
+	  !(ci->flags & CCI_VARARG)) {
+	lua_assert(rset_test(as->freeset, fpr));  /* Already evicted. */
+	ra_leftov(as, fpr, ref);
+	fpr += 2;
+	gpr += irt_isnum(ir->t) ? 2 : 1;
+      } else {
+	fpr = REGARG_LASTFPR+1;
+	if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
+	if (gpr <= REGARG_LASTGPR) {
+	  lua_assert(rset_test(as->freeset, gpr));  /* Already evicted. */
+	  if (irt_isfp(ir->t)) {
+	    RegSet of = as->freeset;
+	    Reg r;
+	    /* Workaround to protect argument GPRs from being used for remat. */
+	    as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1);
+	    r = ra_alloc1(as, ref, RSET_FPR);
+	    as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
+	    if (irt_isnum(ir->t)) {
+	      emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
+	      emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
+	      lua_assert(rset_test(as->freeset, gpr+1));  /* Already evicted. */
+	      gpr += 2;
+	    } else if (irt_isfloat(ir->t)) {
+	      emit_tg(as, MIPSI_MFC1, gpr, r);
+	      gpr++;
+	    }
+	  } else {
+	    ra_leftov(as, gpr, ref);
+	    gpr++;
+	  }
+	} else {
+	  Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+	  if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
+	  emit_spstore(as, ir, r, ofs);
+	  ofs += irt_isnum(ir->t) ? 8 : 4;
+	}
+      }
+    } else {
+      fpr = REGARG_LASTFPR+1;
+      if (gpr <= REGARG_LASTGPR)
+	gpr++;
+      else
+	ofs += 4;
+    }
+    checkmclim(as);
+  }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  RegSet drop = RSET_SCRATCH;
+  int hiop = ((ir+1)->o == IR_HIOP);
+  if ((ci->flags & CCI_NOFPRCLOBBER))
+    drop &= ~RSET_FPR;
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  if (hiop && ra_hasreg((ir+1)->r))
+    rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);  /* Evictions must be performed first. */
+  if (ra_used(ir)) {
+    lua_assert(!irt_ispri(ir->t));
+    if (irt_isfp(ir->t)) {
+      if ((ci->flags & CCI_CASTU64)) {
+	int32_t ofs = sps_scale(ir->s);
+	Reg dest = ir->r;
+	if (ra_hasreg(dest)) {
+	  ra_free(as, dest);
+	  ra_modified(as, dest);
+	  emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1);
+	  emit_tg(as, MIPSI_MTC1, RID_RETLO, dest);
+	}
+	if (ofs) {
+	  emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0));
+	  emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4));
+	}
+      } else {
+	ra_destreg(as, ir, RID_FPRET);
+      }
+    } else if (hiop) {
+      ra_destpair(as, ir);
+    } else {
+      ra_destreg(as, ir, RID_RET);
+    }
+  }
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX];
+  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+  asm_collectargs(as, ir, ci, args);
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  CCallInfo ci;
+  IRRef func;
+  IRIns *irf;
+  ci.flags = asm_callx_flags(as, ir);
+  asm_collectargs(as, ir, &ci, args);
+  asm_setupresult(as, ir, &ci);
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  if (irref_isk(func)) {  /* Call to constant address. */
+    ci.func = (ASMFunction)(void *)(irf->i);
+  } else {  /* Need specific register for indirect calls. */
+    Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
+    MCode *p = as->mcp;
+    if (r == RID_CFUNCADDR)
+      *--p = MIPSI_NOP;
+    else
+      *--p = MIPSI_MOVE | MIPSF_D(RID_CFUNCADDR) | MIPSF_S(r);
+    *--p = MIPSI_JALR | MIPSF_S(r);
+    as->mcp = p;
+    ci.func = (ASMFunction)(void *)0;
+  }
+  asm_gencall(as, &ci, args);
+}
+
+static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[id];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
+{
+  /* The modified regs must match with the *.dasc implementation. */
+  RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
+		RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR);
+  if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
+  ra_evictset(as, drop);
+  ra_destreg(as, ir, RID_FPRET);
+  emit_call(as, (void *)lj_ir_callinfo[id].func);
+  ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+  void *pc = ir_kptr(IR(ir->op2));
+  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  as->topslot -= (BCReg)delta;
+  if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
+  emit_setgl(as, base, jit_base);
+  emit_addptr(as, base, -8*delta);
+  asm_guard(as, MIPSI_BNE, RID_TMP,
+	    ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
+  emit_tsi(as, MIPSI_LW, RID_TMP, base, -8);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  asm_guard(as, MIPSI_BC1F, 0, 0);
+  emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
+  emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
+  emit_tg(as, MIPSI_MFC1, dest, tmp);
+  emit_fg(as, MIPSI_CVT_W_D, tmp, left);
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_FPR;
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, allow);
+  Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
+  Reg tmp = ra_scratch(as, rset_clear(allow, right));
+  emit_tg(as, MIPSI_MFC1, dest, tmp);
+  emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
+}
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+  int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+  IRRef lref = ir->op1;
+  lua_assert(irt_type(ir->t) != st);
+  lua_assert(!(irt_isint64(ir->t) ||
+	       (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
+  if (irt_isfp(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    if (stfp) {  /* FP to FP conversion. */
+      emit_fg(as, st == IRT_NUM ? MIPSI_CVT_S_D : MIPSI_CVT_D_S,
+	      dest, ra_alloc1(as, lref, RSET_FPR));
+    } else if (st == IRT_U32) {  /* U32 to FP conversion. */
+      /* y = (x ^ 0x8000000) + 2147483648.0 */
+      Reg left = ra_alloc1(as, lref, RSET_GPR);
+      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
+      emit_fgh(as, irt_isfloat(ir->t) ? MIPSI_ADD_S : MIPSI_ADD_D,
+	       dest, dest, tmp);
+      emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
+	      dest, dest);
+      if (irt_isfloat(ir->t))
+	emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
+		   (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
+		   RSET_GPR);
+      else
+	emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
+		   (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
+		   RSET_GPR);
+      emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
+      emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
+      emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
+    } else {  /* Integer to FP conversion. */
+      Reg left = ra_alloc1(as, lref, RSET_GPR);
+      emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
+	      dest, dest);
+      emit_tg(as, MIPSI_MTC1, left, dest);
+    }
+  } else if (stfp) {  /* FP to integer conversion. */
+    if (irt_isguard(ir->t)) {
+      /* Checked conversions are only supported from number to int. */
+      lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+      asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+    } else {
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      Reg left = ra_alloc1(as, lref, RSET_FPR);
+      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+      if (irt_isu32(ir->t)) {
+	/* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
+	emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
+	emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
+	emit_tg(as, MIPSI_MFC1, dest, tmp);
+	emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D,
+		tmp, tmp);
+	emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D,
+		 tmp, left, tmp);
+	if (st == IRT_FLOAT)
+	  emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
+		     (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
+		     RSET_GPR);
+	else
+	  emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
+		     (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
+		     RSET_GPR);
+      } else {
+	emit_tg(as, MIPSI_MFC1, dest, tmp);
+	emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
+		tmp, left);
+      }
+    }
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
+      Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+      lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+      if ((ir->op2 & IRCONV_SEXT)) {
+	if ((as->flags & JIT_F_MIPS32R2)) {
+	  emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
+	} else {
+	  uint32_t shift = st == IRT_I8 ? 24 : 16;
+	  emit_dta(as, MIPSI_SRA, dest, dest, shift);
+	  emit_dta(as, MIPSI_SLL, dest, left, shift);
+	}
+      } else {
+	emit_tsi(as, MIPSI_ANDI, dest, left,
+		 (int32_t)(st == IRT_U8 ? 0xff : 0xffff));
+      }
+    } else {  /* 32/64 bit integer conversions. */
+      /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
+      ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+    }
+  }
+}
+
+#if LJ_HASFFI
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+  IRCallID id;
+  const CCallInfo *ci;
+  IRRef args[2];
+  args[LJ_BE?0:1] = ir->op1;
+  args[LJ_BE?1:0] = (ir-1)->op1;
+  if (st == IRT_NUM || st == IRT_FLOAT) {
+    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+    ir--;
+  } else {
+    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
+  }
+  ci = &lj_ir_callinfo[id];
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+#endif
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+  IRRef args[2];
+  RegSet drop = RSET_SCRATCH;
+  if (ra_hasreg(ir->r)) rset_set(drop, ir->r);  /* Spill dest reg (if any). */
+  ra_evictset(as, drop);
+  asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO);  /* Test return status. */
+  args[0] = ir->op1;      /* GCstr *str */
+  args[1] = ASMREF_TMP1;  /* TValue *n  */
+  asm_gencall(as, ci, args);
+  /* Store the result to the spill slot or temp slots. */
+  emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1),
+	   RID_SP, sps_scale(ir->s));
+}
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (irt_isnum(ir->t)) {
+    if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
+      ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+    else  /* Otherwise force a spill and use the spill slot. */
+      emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir));
+  } else {
+    /* Otherwise use g->tmptv to hold the TValue. */
+    RegSet allow = rset_exclude(RSET_GPR, dest);
+    Reg type;
+    emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
+    if (!irt_ispri(ir->t)) {
+      Reg src = ra_alloc1(as, ref, allow);
+      emit_setgl(as, src, tmptv.gcr);
+    }
+    type = ra_allock(as, irt_toitype(ir->t), allow);
+    emit_setgl(as, type, tmptv.it);
+  }
+}
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+  IRRef args[2];
+  args[0] = ASMREF_L;
+  as->gcsteps++;
+  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
+    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
+    args[1] = ASMREF_TMP1;  /* const lua_Number * */
+    asm_setupresult(as, ir, ci);  /* GCstr * */
+    asm_gencall(as, ci, args);
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+  } else {
+    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
+    args[1] = ir->op1;  /* int32_t k */
+    asm_setupresult(as, ir, ci);  /* GCstr * */
+    asm_gencall(as, ci, args);
+  }
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx, base;
+  if (irref_isk(ir->op2)) {
+    IRRef tab = IR(ir->op1)->op1;
+    int32_t ofs = asm_fuseabase(as, tab);
+    IRRef refa = ofs ? tab : ir->op1;
+    ofs += 8*IR(ir->op2)->i;
+    if (checki16(ofs)) {
+      base = ra_alloc1(as, refa, RSET_GPR);
+      emit_tsi(as, MIPSI_ADDIU, dest, base, ofs);
+      return;
+    }
+  }
+  base = ra_alloc1(as, ir->op1, RSET_GPR);
+  idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+  emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base);
+  emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+**   Node *n = hashkey(t, key);
+**   do {
+**     if (lj_obj_equal(&n->key, key)) return &n->val;
+**   } while ((n = nextnode(n)));
+**   return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
+  Reg dest = ra_dest(as, ir, allow);
+  Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+  Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
+  IRRef refkey = ir->op2;
+  IRIns *irkey = IR(refkey);
+  IRType1 kt = irkey->t;
+  uint32_t khash;
+  MCLabel l_end, l_loop, l_next;
+
+  rset_clear(allow, tab);
+  if (irt_isnum(kt)) {
+    key = ra_alloc1(as, refkey, RSET_FPR);
+    tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
+  } else if (!irt_ispri(kt)) {
+    key = ra_alloc1(as, refkey, allow);
+    rset_clear(allow, key);
+    type = ra_allock(as, irt_toitype(irkey->t), allow);
+    rset_clear(allow, type);
+  }
+  tmp2 = ra_scratch(as, allow);
+  rset_clear(allow, tmp2);
+
+  /* Key not found in chain: load niltv. */
+  l_end = emit_label(as);
+  if (destused)
+    emit_loada(as, dest, niltvg(J2G(as->J)));
+  else
+    *--as->mcp = MIPSI_NOP;
+  /* Follow hash chain until the end. */
+  emit_move(as, dest, tmp1);
+  l_loop = --as->mcp;
+  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next));
+  l_next = emit_label(as);
+
+  /* Type and value comparison. */
+  if (irt_isnum(kt)) {
+    emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
+    emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
+	emit_tg(as, MIPSI_MFC1, tmp1, key+1);
+    emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
+    emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
+    emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
+  } else {
+    if (irt_ispri(kt)) {
+      emit_branch(as, MIPSI_BEQ, tmp1, type, l_end);
+    } else {
+      emit_branch(as, MIPSI_BEQ, tmp2, key, l_end);
+      emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
+      emit_branch(as, MIPSI_BNE, tmp1, type, l_next);
+    }
+  }
+  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
+  *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
+
+  /* Load main position relative to tab->node into dest. */
+  khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
+  if (khash == 0) {
+    emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
+  } else {
+    Reg tmphash = tmp1;
+    if (irref_isk(refkey))
+      tmphash = ra_allock(as, khash, allow);
+    emit_dst(as, MIPSI_ADDU, dest, dest, tmp1);
+    lua_assert(sizeof(Node) == 24);
+    emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
+    emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
+    emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
+    emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash);
+    emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
+    emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
+    if (irref_isk(refkey)) {
+      /* Nothing to do. */
+    } else if (irt_isstr(kt)) {
+      emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash));
+    } else {  /* Must match with hash*() in lj_tab.c. */
+      emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2);
+      emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);
+      emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2);
+      emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);
+      emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
+      if (irt_isnum(kt)) {
+	emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
+	if ((as->flags & JIT_F_MIPS32R2)) {
+	  emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
+	} else {
+	  emit_dst(as, MIPSI_OR, dest, dest, tmp1);
+	  emit_dta(as, MIPSI_SLL, tmp1, tmp1, HASH_ROT1);
+	  emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31);
+	}
+	emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
+	emit_tg(as, MIPSI_MFC1, tmp2, key);
+	emit_tg(as, MIPSI_MFC1, tmp1, key+1);
+      } else {
+	emit_dst(as, MIPSI_XOR, tmp2, key, tmp1);
+	emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31);
+	emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow));
+      }
+    }
+  }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+  IRIns *kslot = IR(ir->op2);
+  IRIns *irkey = IR(kslot->op1);
+  int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+  int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+  Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+  Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg key = RID_NONE, type = RID_TMP, idx = node;
+  RegSet allow = rset_exclude(RSET_GPR, node);
+  int32_t lo, hi;
+  lua_assert(ofs % sizeof(Node) == 0);
+  if (ofs > 32736) {
+    idx = dest;
+    rset_clear(allow, dest);
+    kofs = (int32_t)offsetof(Node, key);
+  } else if (ra_hasreg(dest)) {
+    emit_tsi(as, MIPSI_ADDIU, dest, node, ofs);
+  }
+  if (!irt_ispri(irkey->t)) {
+    key = ra_scratch(as, allow);
+    rset_clear(allow, key);
+  }
+  if (irt_isnum(irkey->t)) {
+    lo = (int32_t)ir_knum(irkey)->u32.lo;
+    hi = (int32_t)ir_knum(irkey)->u32.hi;
+  } else {
+    lo = irkey->i;
+    hi = irt_toitype(irkey->t);
+    if (!ra_hasreg(key))
+      goto nolo;
+  }
+  asm_guard(as, MIPSI_BNE, key, lo ? ra_allock(as, lo, allow) : RID_ZERO);
+nolo:
+  asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO);
+  if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0));
+  emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
+  if (ofs > 32736)
+    emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
+}
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+    IRRef args[3];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* GCtab *t     */
+    args[2] = ASMREF_TMP1;  /* cTValue *key */
+    asm_setupresult(as, ir, ci);  /* TValue * */
+    asm_gencall(as, ci, args);
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+  }
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+  /* NYI: Check that UREFO is still open and not aliasing a slot. */
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  if (irref_isk(ir->op1)) {
+    GCfunc *fn = ir_kfunc(IR(ir->op1));
+    MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+    emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR);
+  } else {
+    Reg uv = ra_scratch(as, RSET_GPR);
+    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (ir->o == IR_UREFC) {
+      asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
+      emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
+      emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    } else {
+      emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v));
+    }
+    emit_tsi(as, MIPSI_LW, uv, func,
+	     (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
+  }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+  UNUSED(as); UNUSED(ir);
+  lua_assert(!ra_used(ir));
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  IRRef ref = ir->op2, refk = ir->op1;
+  int32_t ofs = (int32_t)sizeof(GCstr);
+  Reg r;
+  if (irref_isk(ref)) {
+    IRRef tmp = refk; refk = ref; ref = tmp;
+  } else if (!irref_isk(refk)) {
+    Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
+    IRIns *irr = IR(ir->op2);
+    if (ra_hasreg(irr->r)) {
+      ra_noweak(as, irr->r);
+      right = irr->r;
+    } else if (mayfuse(as, irr->op2) &&
+	       irr->o == IR_ADD && irref_isk(irr->op2) &&
+	       checki16(ofs + IR(irr->op2)->i)) {
+      ofs += IR(irr->op2)->i;
+      right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
+    } else {
+      right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
+    }
+    emit_tsi(as, MIPSI_ADDIU, dest, dest, ofs);
+    emit_dst(as, MIPSI_ADDU, dest, left, right);
+    return;
+  }
+  r = ra_alloc1(as, ref, RSET_GPR);
+  ofs += IR(refk)->i;
+  if (checki16(ofs))
+    emit_tsi(as, MIPSI_ADDIU, dest, r, ofs);
+  else
+    emit_dst(as, MIPSI_ADDU, dest, r,
+	     ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static MIPSIns asm_fxloadins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: return MIPSI_LB;
+  case IRT_U8: return MIPSI_LBU;
+  case IRT_I16: return MIPSI_LH;
+  case IRT_U16: return MIPSI_LHU;
+  case IRT_NUM: return MIPSI_LDC1;
+  case IRT_FLOAT: return MIPSI_LWC1;
+  default: return MIPSI_LW;
+  }
+}
+
+static MIPSIns asm_fxstoreins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: case IRT_U8: return MIPSI_SB;
+  case IRT_I16: case IRT_U16: return MIPSI_SH;
+  case IRT_NUM: return MIPSI_SDC1;
+  case IRT_FLOAT: return MIPSI_SWC1;
+  default: return MIPSI_SW;
+  }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
+  MIPSIns mi = asm_fxloadins(ir);
+  int32_t ofs;
+  if (ir->op2 == IRFL_TAB_ARRAY) {
+    ofs = asm_fuseabase(as, ir->op1);
+    if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+      emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs);
+      return;
+    }
+  }
+  ofs = field_ofs[ir->op2];
+  lua_assert(!irt_isfp(ir->t));
+  emit_tsi(as, mi, dest, idx, ofs);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
+    IRIns *irf = IR(ir->op1);
+    Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+    int32_t ofs = field_ofs[irf->op2];
+    MIPSIns mi = asm_fxstoreins(ir);
+    lua_assert(!irt_isfp(ir->t));
+    emit_tsi(as, mi, src, idx, ofs);
+  }
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+  lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
+}
+
+static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+    asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+		 rset_exclude(RSET_GPR, src), ofs);
+  }
+}
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+  IRType1 t = ir->t;
+  Reg dest = RID_NONE, type = RID_TMP, idx;
+  RegSet allow = RSET_GPR;
+  int32_t ofs = 0;
+  if (ra_used(ir)) {
+    lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
+    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+    rset_clear(allow, dest);
+  }
+  idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+  rset_clear(allow, idx);
+  if (irt_isnum(t)) {
+    asm_guard(as, MIPSI_BEQ, type, RID_ZERO);
+    emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM);
+    if (ra_hasreg(dest))
+      emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
+  } else {
+    asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow));
+    if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
+  }
+  emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4));
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_GPR;
+  Reg idx, src = RID_NONE, type = RID_NONE;
+  int32_t ofs = 0;
+  if (ir->r == RID_SINK)
+    return;
+  if (irt_isnum(ir->t)) {
+    src = ra_alloc1(as, ir->op2, RSET_FPR);
+  } else {
+    if (!irt_ispri(ir->t)) {
+      src = ra_alloc1(as, ir->op2, allow);
+      rset_clear(allow, src);
+    }
+    type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+    rset_clear(allow, type);
+  }
+  idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+  if (irt_isnum(ir->t)) {
+    emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
+  } else {
+    if (ra_hasreg(src))
+      emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0));
+    emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4));
+  }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
+  IRType1 t = ir->t;
+  Reg dest = RID_NONE, type = RID_NONE, base;
+  RegSet allow = RSET_GPR;
+  lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
+  lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+  lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
+  if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
+    dest = ra_scratch(as, RSET_FPR);
+    asm_tointg(as, ir, dest);
+    t.irt = IRT_NUM;  /* Continue with a regular number type check. */
+  } else if (ra_used(ir)) {
+    lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
+    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+    rset_clear(allow, dest);
+    base = ra_alloc1(as, REF_BASE, allow);
+    rset_clear(allow, base);
+    if ((ir->op2 & IRSLOAD_CONVERT)) {
+      if (irt_isint(t)) {
+	Reg tmp = ra_scratch(as, RSET_FPR);
+	emit_tg(as, MIPSI_MFC1, dest, tmp);
+	emit_fg(as, MIPSI_CVT_W_D, tmp, tmp);
+	dest = tmp;
+	t.irt = IRT_NUM;  /* Check for original type. */
+      } else {
+	Reg tmp = ra_scratch(as, RSET_GPR);
+	emit_fg(as, MIPSI_CVT_D_W, dest, dest);
+	emit_tg(as, MIPSI_MTC1, tmp, dest);
+	dest = tmp;
+	t.irt = IRT_INT;  /* Check for original type. */
+      }
+    }
+    goto dotypecheck;
+  }
+  base = ra_alloc1(as, REF_BASE, allow);
+  rset_clear(allow, base);
+dotypecheck:
+  if (irt_isnum(t)) {
+    if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+      asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
+      emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
+      type = RID_TMP;
+    }
+    if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
+  } else {
+    if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+      Reg ktype = ra_allock(as, irt_toitype(t), allow);
+      asm_guard(as, MIPSI_BNE, RID_TMP, ktype);
+      type = RID_TMP;
+    }
+    if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
+  }
+  if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+  CTState *cts = ctype_ctsG(J2G(as->J));
+  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
+  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
+	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+  IRRef args[2];
+  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  RegSet drop = RSET_SCRATCH;
+  lua_assert(sz != CTSIZE_INVALID);
+
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
+  as->gcsteps++;
+
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);
+  if (ra_used(ir))
+    ra_destreg(as, ir, RID_RET);  /* GCcdata * */
+
+  /* Initialize immutable cdata object. */
+  if (ir->o == IR_CNEWI) {
+    int32_t ofs = sizeof(GCcdata);
+    lua_assert(sz == 4 || sz == 8);
+    if (sz == 8) {
+      ofs += 4;
+      lua_assert((ir+1)->o == IR_HIOP);
+      if (LJ_LE) ir++;
+    }
+    for (;;) {
+      Reg r = ra_alloc1z(as, ir->op2, allow);
+      emit_tsi(as, MIPSI_SW, r, RID_RET, ofs);
+      rset_clear(allow, r);
+      if (ofs == sizeof(GCcdata)) break;
+      ofs -= 4; if (LJ_BE) ir++; else ir--;
+    }
+  }
+  /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
+  emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
+  emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
+  emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
+  emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */
+  asm_gencall(as, ci, args);
+  ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+	       ra_releasetmp(as, ASMREF_TMP1));
+}
+#else
+#define asm_cnew(as, ir)	((void)0)
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+  Reg link = RID_TMP;
+  MCLabel l_end = emit_label(as);
+  emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist));
+  emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
+  emit_setgl(as, tab, gc.grayagain);
+  emit_getgl(as, link, gc.grayagain);
+  emit_dst(as, MIPSI_XOR, mark, mark, RID_TMP);  /* Clear black bit. */
+  emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
+  emit_tsi(as, MIPSI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
+  emit_tsi(as, MIPSI_LBU, mark, tab, (int32_t)offsetof(GCtab, marked));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg obj, val, tmp;
+  /* No need for other object barriers (yet). */
+  lua_assert(IR(ir->op1)->o == IR_UREFC);
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ir->op1;      /* TValue *tv      */
+  asm_gencall(as, ci, args);
+  emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
+  obj = IR(ir->op1)->r;
+  tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
+  emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
+  emit_tsi(as, MIPSI_ANDI, tmp, tmp, LJ_GC_BLACK);
+  emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
+  emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
+  val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
+  emit_tsi(as, MIPSI_LBU, tmp, obj,
+	   (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
+  emit_tsi(as, MIPSI_LBU, RID_TMP, val, (int32_t)offsetof(GChead, marked));
+}
+
+/* -- Arithmetic and logic operations ------------------------------------- */
+
+static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = (left >> 8); left &= 255;
+  emit_fgh(as, mi, dest, left, right);
+}
+
+static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+  emit_fg(as, mi, dest, left);
+}
+
+static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
+{
+  IRIns *irp = IR(ir->op1);
+  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+    IRIns *irpp = IR(irp->op1);
+    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
+      IRRef args[2];
+      args[0] = irpp->op1;
+      args[1] = irp->op2;
+      asm_setupresult(as, ir, ci);
+      asm_gencall(as, ci, args);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fparith(as, ir, MIPSI_ADD_D);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    if (irref_isk(ir->op2)) {
+      int32_t k = IR(ir->op2)->i;
+      if (checki16(k)) {
+	emit_tsi(as, MIPSI_ADDIU, dest, left, k);
+	return;
+      }
+    }
+    right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    emit_dst(as, MIPSI_ADDU, dest, left, right);
+  }
+}
+
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fparith(as, ir, MIPSI_SUB_D);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+    right = (left >> 8); left &= 255;
+    emit_dst(as, MIPSI_SUBU, dest, left, right);
+  }
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fparith(as, ir, MIPSI_MUL_D);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+    right = (left >> 8); left &= 255;
+    emit_dst(as, MIPSI_MUL, dest, left, right);
+  }
+}
+
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fpunary(as, ir, MIPSI_NEG_D);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
+  }
+}
+
+static void asm_arithov(ASMState *as, IRIns *ir)
+{
+  Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int k = IR(ir->op2)->i;
+    if (ir->o == IR_SUBOV) k = -k;
+    if (checki16(k)) {  /* (dest < left) == (k >= 0 ? 1 : 0) */
+      left = ra_alloc1(as, ir->op1, RSET_GPR);
+      asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
+      emit_dst(as, MIPSI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left);
+      emit_tsi(as, MIPSI_ADDIU, dest, left, k);
+      if (dest == left) emit_move(as, RID_TMP, left);
+      return;
+    }
+  }
+  left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
+						 right), dest));
+  asm_guard(as, MIPSI_BLTZ, RID_TMP, 0);
+  emit_dst(as, MIPSI_AND, RID_TMP, RID_TMP, tmp);
+  if (ir->o == IR_ADDOV) {  /* ((dest^left) & (dest^right)) < 0 */
+    emit_dst(as, MIPSI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
+  } else {  /* ((dest^left) & (dest^~right)) < 0 */
+    emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, dest);
+    emit_dst(as, MIPSI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO);
+  }
+  emit_dst(as, MIPSI_XOR, tmp, dest, dest == left ? RID_TMP : left);
+  emit_dst(as, ir->o == IR_ADDOV ? MIPSI_ADDU : MIPSI_SUBU, dest, left, right);
+  if (dest == left || dest == right)
+    emit_move(as, RID_TMP, dest == left ? left : right);
+}
+
+static void asm_mulov(ASMState *as, IRIns *ir)
+{
+#if LJ_DUALNUM
+#error "NYI: MULOV"
+#else
+  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused in single-number mode. */
+#endif
+}
+
+#if LJ_HASFFI
+static void asm_add64(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    if (k == 0) {
+      emit_dst(as, MIPSI_ADDU, dest, left, RID_TMP);
+      goto loarith;
+    } else if (checki16(k)) {
+      emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP);
+      emit_tsi(as, MIPSI_ADDIU, dest, left, k);
+      goto loarith;
+    }
+  }
+  emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP);
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_dst(as, MIPSI_ADDU, dest, left, right);
+loarith:
+  ir--;
+  dest = ra_dest(as, ir, RSET_GPR);
+  left = ra_alloc1(as, ir->op1, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    if (k == 0) {
+      if (dest != left)
+	emit_move(as, dest, left);
+      return;
+    } else if (checki16(k)) {
+      if (dest == left) {
+	Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, left));
+	emit_move(as, dest, tmp);
+	dest = tmp;
+      }
+      emit_dst(as, MIPSI_SLTU, RID_TMP, dest, left);
+      emit_tsi(as, MIPSI_ADDIU, dest, left, k);
+      return;
+    }
+  }
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  if (dest == left && dest == right) {
+    Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
+    emit_move(as, dest, tmp);
+    dest = tmp;
+  }
+  emit_dst(as, MIPSI_SLTU, RID_TMP, dest, dest == left ? right : left);
+  emit_dst(as, MIPSI_ADDU, dest, left, right);
+}
+
+static void asm_sub64(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP);
+  emit_dst(as, MIPSI_SUBU, dest, left, right);
+  ir--;
+  dest = ra_dest(as, ir, RSET_GPR);
+  left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  if (dest == left) {
+    Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
+    emit_move(as, dest, tmp);
+    dest = tmp;
+  }
+  emit_dst(as, MIPSI_SLTU, RID_TMP, left, dest);
+  emit_dst(as, MIPSI_SUBU, dest, left, right);
+}
+
+static void asm_neg64(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+  emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP);
+  emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
+  ir--;
+  dest = ra_dest(as, ir, RSET_GPR);
+  left = ra_alloc1(as, ir->op1, RSET_GPR);
+  emit_dst(as, MIPSI_SLTU, RID_TMP, RID_ZERO, dest);
+  emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
+}
+#endif
+
+static void asm_bitnot(ASMState *as, IRIns *ir)
+{
+  Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
+  IRIns *irl = IR(ir->op1);
+  if (mayfuse(as, ir->op1) && irl->o == IR_BOR) {
+    left = ra_alloc2(as, irl, RSET_GPR);
+    right = (left >> 8); left &= 255;
+  } else {
+    left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    right = RID_ZERO;
+  }
+  emit_dst(as, MIPSI_NOR, dest, left, right);
+}
+
+static void asm_bitswap(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+  if ((as->flags & JIT_F_MIPS32R2)) {
+    emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
+    emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
+  } else {
+    Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), dest));
+    emit_dst(as, MIPSI_OR, dest, dest, tmp);
+    emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
+    emit_tsi(as, MIPSI_ANDI, dest, dest, 0xff00);
+    emit_dta(as, MIPSI_SLL, RID_TMP, RID_TMP, 8);
+    emit_dta(as, MIPSI_SRL, dest, left, 8);
+    emit_tsi(as, MIPSI_ANDI, RID_TMP, left, 0xff00);
+    emit_dst(as, MIPSI_OR, tmp, tmp, RID_TMP);
+    emit_dta(as, MIPSI_SRL, tmp, left, 24);
+    emit_dta(as, MIPSI_SLL, RID_TMP, left, 24);
+  }
+}
+
+static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    if (checku16(k)) {
+      emit_tsi(as, mik, dest, left, k);
+      return;
+    }
+  }
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_dst(as, mi, dest, left, right);
+}
+
+static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  if (irref_isk(ir->op2)) {  /* Constant shifts. */
+    uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31);
+    emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift);
+  } else {
+    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+    right = (left >> 8); left &= 255;
+    emit_dst(as, mi, dest, right, left);  /* Shift amount is in rs. */
+  }
+}
+
+static void asm_bitror(ASMState *as, IRIns *ir)
+{
+  if ((as->flags & JIT_F_MIPS32R2)) {
+    asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    if (irref_isk(ir->op2)) {  /* Constant shifts. */
+      uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31);
+      Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+      emit_rotr(as, dest, left, RID_TMP, shift);
+    } else {
+      Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+      right = (left >> 8); left &= 255;
+      emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
+      emit_dst(as, MIPSI_SRLV, dest, right, left);
+      emit_dst(as, MIPSI_SLLV, RID_TMP, RID_TMP, left);
+      emit_dst(as, MIPSI_SUBU, RID_TMP, ra_allock(as, 32, RSET_GPR), right);
+    }
+  }
+}
+
+static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
+{
+  if (irt_isnum(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+    right = (left >> 8); left &= 255;
+    if (dest == left) {
+      emit_fg(as, MIPSI_MOVT_D, dest, right);
+    } else {
+      emit_fg(as, MIPSI_MOVF_D, dest, left);
+      if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
+    }
+    emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+    right = (left >> 8); left &= 255;
+    if (dest == left) {
+      emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
+    } else {
+      emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
+      if (dest != right) emit_move(as, dest, right);
+    }
+    emit_dst(as, MIPSI_SLT, RID_TMP,
+	     ismax ? left : right, ismax ? right : left);
+  }
+}
+
+/* -- Comparisons --------------------------------------------------------- */
+
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+  /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
+  IROp op = ir->o;
+  if (irt_isnum(ir->t)) {
+    Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+    right = (left >> 8); left &= 255;
+    asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
+    emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
+  } else {
+    Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (op == IR_ABC) op = IR_UGT;
+    if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) {
+      MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
+			    ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
+      asm_guard(as, mi, left, 0);
+    } else {
+      if (irref_isk(ir->op2)) {
+	int32_t k = IR(ir->op2)->i;
+	if ((op&2)) k++;
+	if (checki16(k)) {
+	  asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
+	  emit_tsi(as, (op&4) ? MIPSI_SLTIU : MIPSI_SLTI,
+		   RID_TMP, left, k);
+	  return;
+	}
+      }
+      right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+      asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
+      emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT,
+	       RID_TMP, (op&2) ? right : left, (op&2) ? left : right);
+    }
+  }
+}
+
+static void asm_compeq(ASMState *as, IRIns *ir)
+{
+  Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
+  right = (left >> 8); left &= 255;
+  if (irt_isnum(ir->t)) {
+    asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
+    emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
+  } else {
+    asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
+  }
+}
+
+#if LJ_HASFFI
+/* 64 bit integer comparisons. */
+static void asm_comp64(ASMState *as, IRIns *ir)
+{
+  /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
+  IROp op = (ir-1)->o;
+  MCLabel l_end;
+  Reg rightlo, leftlo, righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
+  righthi = (lefthi >> 8); lefthi &= 255;
+  leftlo = ra_alloc2(as, ir-1,
+		     rset_exclude(rset_exclude(RSET_GPR, lefthi), righthi));
+  rightlo = (leftlo >> 8); leftlo &= 255;
+  asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
+  l_end = emit_label(as);
+  if (lefthi != righthi)
+    emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, RID_TMP,
+	     (op&2) ? righthi : lefthi, (op&2) ? lefthi : righthi);
+  emit_dst(as, MIPSI_SLTU, RID_TMP,
+	   (op&2) ? rightlo : leftlo, (op&2) ? leftlo : rightlo);
+  if (lefthi != righthi)
+    emit_branch(as, MIPSI_BEQ, lefthi, righthi, l_end);
+}
+
+static void asm_comp64eq(ASMState *as, IRIns *ir)
+{
+  Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  asm_guard(as, ((ir-1)->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
+  tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
+  emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
+  emit_dst(as, MIPSI_XOR, tmp, left, right);
+  left = ra_alloc2(as, ir-1, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  emit_dst(as, MIPSI_XOR, RID_TMP, left, right);
+}
+#endif
+
+/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+
+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+#if LJ_HASFFI
+  /* HIOP is marked as a store because it needs its own DCE logic. */
+  int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
+  if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+  if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
+    as->curins--;  /* Always skip the CONV. */
+    if (usehi || uselo)
+      asm_conv64(as, ir);
+    return;
+  } else if ((ir-1)->o < IR_EQ) {  /* 64 bit integer comparisons. ORDER IR. */
+    as->curins--;  /* Always skip the loword comparison. */
+    asm_comp64(as, ir);
+    return;
+  } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
+    as->curins--;  /* Always skip the loword comparison. */
+    asm_comp64eq(as, ir);
+    return;
+  } else if ((ir-1)->o == IR_XSTORE) {
+    as->curins--;  /* Handle both stores here. */
+    if ((ir-1)->r != RID_SINK) {
+      asm_xstore(as, ir, LJ_LE ? 4 : 0);
+      asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
+    }
+    return;
+  }
+  if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
+  switch ((ir-1)->o) {
+  case IR_ADD: as->curins--; asm_add64(as, ir); break;
+  case IR_SUB: as->curins--; asm_sub64(as, ir); break;
+  case IR_NEG: as->curins--; asm_neg64(as, ir); break;
+  case IR_CALLN:
+  case IR_CALLXS:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
+    break;
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
+  default: lua_assert(0); break;
+  }
+#else
+  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused without FFI. */
+#endif
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as, BCReg topslot,
+			    IRIns *irp, RegSet allow, ExitNo exitno)
+{
+  /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */
+  Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
+  ExitNo oldsnap = as->snapno;
+  rset_clear(allow, pbase);
+  tmp = allow ? rset_pickbot(allow) :
+		(pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
+  as->snapno = exitno;
+  asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
+  as->snapno = oldsnap;
+  if (allow == RSET_EMPTY)  /* Restore temp. register. */
+    emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0);
+  else
+    ra_modified(as, tmp);
+  emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
+  emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase);
+  emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
+  if (pbase == RID_TMP)
+    emit_getgl(as, RID_TMP, jit_base);
+  emit_getgl(as, tmp, jit_L);
+  if (allow == RSET_EMPTY)  /* Spill temp. register. */
+    emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+  SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
+  MSize n, nent = snap->nent;
+  /* Store the value of all modified slots to the Lua stack. */
+  for (n = 0; n < nent; n++) {
+    SnapEntry sn = map[n];
+    BCReg s = snap_slot(sn);
+    int32_t ofs = 8*((int32_t)s-1);
+    IRRef ref = snap_ref(sn);
+    IRIns *ir = IR(ref);
+    if ((sn & SNAP_NORESTORE))
+      continue;
+    if (irt_isnum(ir->t)) {
+      Reg src = ra_alloc1(as, ref, RSET_FPR);
+      emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
+    } else {
+      Reg type;
+      RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+      lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, allow);
+	rset_clear(allow, src);
+	emit_tsi(as, MIPSI_SW, src, RID_BASE, ofs+(LJ_BE?4:0));
+      }
+      if ((sn & (SNAP_CONT|SNAP_FRAME))) {
+	if (s == 0) continue;  /* Do not overwrite link to previous frame. */
+	type = ra_allock(as, (int32_t)(*flinks--), allow);
+      } else {
+	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+      }
+      emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
+    }
+    checkmclim(as);
+  }
+  lua_assert(map + nent == flinks);
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg tmp;
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+  /* Assumes asm_snap_prep() already done. */
+  asm_guard(as, MIPSI_BNE, RID_RET, RID_ZERO);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ASMREF_TMP2;  /* MSize steps     */
+  asm_gencall(as, ci, args);
+  emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
+  tmp = ra_releasetmp(as, ASMREF_TMP2);
+  emit_loadi(as, tmp, as->gcsteps);
+  /* Jump around GC step if GC total < GC threshold. */
+  emit_branch(as, MIPSI_BNE, RID_TMP, RID_ZERO, l_end);
+  emit_dst(as, MIPSI_SLTU, RID_TMP, RID_TMP, tmp);
+  emit_getgl(as, tmp, gc.threshold);
+  emit_getgl(as, RID_TMP, gc.total);
+  as->gcsteps = 0;
+  checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+  MCode *p = as->mctop;
+  MCode *target = as->mcp;
+  p[-1] = MIPSI_NOP;
+  if (as->loopinv) {  /* Inverted loop branch? */
+    /* asm_guard already inverted the cond branch. Only patch the target. */
+    p[-3] |= ((target-p+2) & 0x0000ffffu);
+  } else {
+    p[-2] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
+  }
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (as->loopinv) as->mctop--;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (r != RID_BASE)
+      emit_move(as, r, RID_BASE);
+  }
+}
+
+/* Coalesce BASE register for a side trace. */
+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+{
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (as->loopinv) as->mctop--;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (irp->r == r) {
+      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+    } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
+      rset_clear(allow, irp->r);
+      emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
+    } else {
+      emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
+    }
+  }
+  return allow;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+  MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
+  int32_t spadj = as->T->spadjust;
+  MCode *p = as->mctop-1;
+  *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
+  p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+  as->mcp = as->mctop-2;  /* Leave room for branch plus nop or stack adj. */
+  as->invmcp = as->loopref ? as->mcp : NULL;
+}
+
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+  switch ((IROp)ir->o) {
+  /* Miscellaneous ops. */
+  case IR_LOOP: asm_loop(as); break;
+  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
+  case IR_USE:
+    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+  case IR_PHI: asm_phi(as, ir); break;
+  case IR_HIOP: asm_hiop(as, ir); break;
+  case IR_GCSTEP: asm_gcstep(as, ir); break;
+
+  /* Guarded assertions. */
+  case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
+  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
+  case IR_ABC:
+    asm_comp(as, ir);
+    break;
+
+  case IR_RETF: asm_retf(as, ir); break;
+
+  /* Bit ops. */
+  case IR_BNOT: asm_bitnot(as, ir); break;
+  case IR_BSWAP: asm_bitswap(as, ir); break;
+
+  case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
+  case IR_BOR:  asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
+  case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
+
+  case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
+  case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
+  case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
+  case IR_BROL: lua_assert(0); break;
+  case IR_BROR: asm_bitror(as, ir); break;
+
+  /* Arithmetic ops. */
+  case IR_ADD: asm_add(as, ir); break;
+  case IR_SUB: asm_sub(as, ir); break;
+  case IR_MUL: asm_mul(as, ir); break;
+  case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
+  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
+  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
+  case IR_NEG: asm_neg(as, ir); break;
+
+  case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
+  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
+  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
+  case IR_MIN: asm_min_max(as, ir, 0); break;
+  case IR_MAX: asm_min_max(as, ir, 1); break;
+  case IR_FPMATH:
+    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+      break;
+    if (ir->op2 <= IRFPM_TRUNC)
+      asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
+    else if (ir->op2 == IRFPM_SQRT)
+      asm_fpunary(as, ir, MIPSI_SQRT_D);
+    else
+      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
+    break;
+
+  /* Overflow-checking arithmetic ops. */
+  case IR_ADDOV: asm_arithov(as, ir); break;
+  case IR_SUBOV: asm_arithov(as, ir); break;
+  case IR_MULOV: asm_mulov(as, ir); break;
+
+  /* Memory references. */
+  case IR_AREF: asm_aref(as, ir); break;
+  case IR_HREF: asm_href(as, ir); break;
+  case IR_HREFK: asm_hrefk(as, ir); break;
+  case IR_NEWREF: asm_newref(as, ir); break;
+  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+  case IR_FREF: asm_fref(as, ir); break;
+  case IR_STRREF: asm_strref(as, ir); break;
+
+  /* Loads and stores. */
+  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+    asm_ahuvload(as, ir);
+    break;
+  case IR_FLOAD: asm_fload(as, ir); break;
+  case IR_XLOAD: asm_xload(as, ir); break;
+  case IR_SLOAD: asm_sload(as, ir); break;
+
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+  case IR_FSTORE: asm_fstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir, 0); break;
+
+  /* Allocations. */
+  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+  case IR_TNEW: asm_tnew(as, ir); break;
+  case IR_TDUP: asm_tdup(as, ir); break;
+  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
+
+  /* Write barriers. */
+  case IR_TBAR: asm_tbar(as, ir); break;
+  case IR_OBAR: asm_obar(as, ir); break;
+
+  /* Type conversions. */
+  case IR_CONV: asm_conv(as, ir); break;
+  case IR_TOBIT: asm_tobit(as, ir); break;
+  case IR_TOSTR: asm_tostr(as, ir); break;
+  case IR_STRTO: asm_strto(as, ir); break;
+
+  /* Calls. */
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+  case IR_CALLXS: asm_callx(as, ir); break;
+  case IR_CARG: break;
+
+  default:
+    setintV(&as->J->errinfo, ir->o);
+    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+    break;
+  }
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+  asm_collectargs(as, ir, ci, args);
+  for (i = 0; i < nargs; i++) {
+    if (args[i] && irt_isfp(IR(args[i])->t) &&
+	nfpr > 0 && !(ci->flags & CCI_VARARG)) {
+      nfpr--;
+      ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1;
+    } else if (args[i] && irt_isnum(IR(args[i])->t)) {
+      nfpr = 0;
+      ngpr = ngpr & ~1;
+      if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1;
+    } else {
+      nfpr = 0;
+      if (ngpr > 0) ngpr--; else nslots++;
+    }
+  }
+  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
+    as->evenspill = nslots;
+  return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
+}
+
+static void asm_setup_target(ASMState *as)
+{
+  asm_sparejump_setup(as);
+  asm_exitstub_setup(as);
+}
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+  MCode *p = T->mcode;
+  MCode *pe = (MCode *)((char *)p + T->szmcode);
+  MCode *px = exitstub_trace_addr(T, exitno);
+  MCode *cstart = NULL, *cstop = NULL;
+  MCode *mcarea = lj_mcode_patch(J, p, 0);
+  MCode exitload = MIPSI_LI | MIPSF_T(RID_TMP) | exitno;
+  MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
+  for (p++; p < pe; p++) {
+    if (*p == exitload) {  /* Look for load of exit number. */
+      if (((p[-1] ^ (px-p)) & 0xffffu) == 0) {  /* Look for exitstub branch. */
+	ptrdiff_t delta = target - p;
+	if (((delta + 0x8000) >> 16) == 0) {  /* Patch in-range branch. */
+	patchbranch:
+	  p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
+	  *p = MIPSI_NOP;  /* Replace the load of the exit number. */
+	  cstop = p;
+	  if (!cstart) cstart = p-1;
+	} else {  /* Branch out of range. Use spare jump slot in mcarea. */
+	  int i;
+	  for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) {
+	    if (mcarea[i] == tjump) {
+	      delta = mcarea+i - p;
+	      goto patchbranch;
+	    } else if (mcarea[i] == MIPSI_NOP) {
+	      mcarea[i] = tjump;
+	      cstart = mcarea+i;
+	      delta = mcarea+i - p;
+	      goto patchbranch;
+	    }
+	  }
+	  /* Ignore jump slot overflow. Child trace is simply not attached. */
+	}
+      } else if (p+1 == pe) {
+	/* Patch NOP after code for inverted loop branch. Use of J is ok. */
+	lua_assert(p[1] == MIPSI_NOP);
+	p[1] = tjump;
+	*p = MIPSI_NOP;  /* Replace the load of the exit number. */
+	cstop = p+2;
+	if (!cstart) cstart = p+1;
+      }
+    }
+  }
+  if (cstart) lj_mcode_sync(cstart, cstop);
+  lj_mcode_patch(J, mcarea, 1);
+}
+

+ 2169 - 0
luajit.mod/luajit/src/lj_asm_ppc.h

@@ -0,0 +1,2169 @@
+/*
+** PPC IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* -- Register allocator extensions --------------------------------------- */
+
+/* Allocate a register with a hint. */
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
+{
+  Reg r = IR(ref)->r;
+  if (ra_noreg(r)) {
+    if (!ra_hashint(r) && !iscrossref(as, ref))
+      ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
+    r = ra_allocref(as, ref, allow);
+  }
+  ra_noweak(as, r);
+  return r;
+}
+
+/* Allocate two source registers for three-operand instructions. */
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+{
+  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+  Reg left = irl->r, right = irr->r;
+  if (ra_hasreg(left)) {
+    ra_noweak(as, left);
+    if (ra_noreg(right))
+      right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
+    else
+      ra_noweak(as, right);
+  } else if (ra_hasreg(right)) {
+    ra_noweak(as, right);
+    left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
+  } else if (ra_hashint(right)) {
+    right = ra_allocref(as, ir->op2, allow);
+    left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
+  } else {
+    left = ra_allocref(as, ir->op1, allow);
+    right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
+  }
+  return left | (right << 8);
+}
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Setup exit stubs after the end of each trace. */
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+{
+  ExitNo i;
+  MCode *mxp = as->mctop;
+  if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+    asm_mclimit(as);
+  /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
+  for (i = nexits-1; (int32_t)i >= 0; i--)
+    *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
+  *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno;  /* Read by exit handler. */
+  mxp--;
+  *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2);
+  *--mxp = PPCI_MFLR|PPCF_T(RID_TMP);
+  as->mctop = mxp;
+}
+
+static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
+{
+  /* Keep this in-sync with exitstub_trace_addr(). */
+  return as->mctop + exitno + 3;
+}
+
+/* Emit conditional branch to exit for guard. */
+static void asm_guardcc(ASMState *as, PPCCC cc)
+{
+  MCode *target = asm_exitstub_addr(as, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    *p = PPCI_B | (((target-p) & 0x00ffffffu) << 2);
+    emit_condbranch(as, PPCI_BC, cc^4, p);
+    return;
+  }
+  emit_condbranch(as, PPCI_BC, cc, target);
+}
+
+/* -- Operand fusion ------------------------------------------------------ */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM	31
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+  IRIns *ir = as->ir;
+  IRRef i = as->curins;
+  if (i > ref + CONFLICT_SEARCH_LIM)
+    return 0;  /* Give up, ref is too far away. */
+  while (--i > ref)
+    if (ir[i].o == conflict)
+      return 0;  /* Conflict found. */
+  return 1;  /* Ok, no conflict. */
+}
+
+/* Fuse the array base of colocated arrays. */
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
+      !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
+    return (int32_t)sizeof(GCtab);
+  return 0;
+}
+
+/* Indicates load/store indexed is ok. */
+#define AHUREF_LSX	((int32_t)0x80000000)
+
+/* Fuse array/hash/upvalue reference into register+offset operand. */
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  if (ra_noreg(ir->r)) {
+    if (ir->o == IR_AREF) {
+      if (mayfuse(as, ref)) {
+	if (irref_isk(ir->op2)) {
+	  IRRef tab = IR(ir->op1)->op1;
+	  int32_t ofs = asm_fuseabase(as, tab);
+	  IRRef refa = ofs ? tab : ir->op1;
+	  ofs += 8*IR(ir->op2)->i;
+	  if (checki16(ofs)) {
+	    *ofsp = ofs;
+	    return ra_alloc1(as, refa, allow);
+	  }
+	}
+	if (*ofsp == AHUREF_LSX) {
+	  Reg base = ra_alloc1(as, ir->op1, allow);
+	  Reg idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+	  return base | (idx << 8);
+	}
+      }
+    } else if (ir->o == IR_HREFK) {
+      if (mayfuse(as, ref)) {
+	int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+	if (checki16(ofs)) {
+	  *ofsp = ofs;
+	  return ra_alloc1(as, ir->op1, allow);
+	}
+      }
+    } else if (ir->o == IR_UREFC) {
+      if (irref_isk(ir->op1)) {
+	GCfunc *fn = ir_kfunc(IR(ir->op1));
+	int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
+	int32_t jgl = (intptr_t)J2G(as->J);
+	if ((uint32_t)(ofs-jgl) < 65536) {
+	  *ofsp = ofs-jgl-32768;
+	  return RID_JGL;
+	} else {
+	  *ofsp = (int16_t)ofs;
+	  return ra_allock(as, ofs-(int16_t)ofs, allow);
+	}
+      }
+    }
+  }
+  *ofsp = 0;
+  return ra_alloc1(as, ref, allow);
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
+			 RegSet allow, int32_t ofs)
+{
+  IRIns *ir = IR(ref);
+  Reg base;
+  if (ra_noreg(ir->r) && canfuse(as, ir)) {
+    if (ir->o == IR_ADD) {
+      int32_t ofs2;
+      if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
+	ofs = ofs2;
+	ref = ir->op1;
+      } else if (ofs == 0) {
+	Reg right, left = ra_alloc2(as, ir, allow);
+	right = (left >> 8); left &= 255;
+	emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right);
+	return;
+      }
+    } else if (ir->o == IR_STRREF) {
+      lua_assert(ofs == 0);
+      ofs = (int32_t)sizeof(GCstr);
+      if (irref_isk(ir->op2)) {
+	ofs += IR(ir->op2)->i;
+	ref = ir->op1;
+      } else if (irref_isk(ir->op1)) {
+	ofs += IR(ir->op1)->i;
+	ref = ir->op2;
+      } else {
+	/* NYI: Fuse ADD with constant. */
+	Reg tmp, right, left = ra_alloc2(as, ir, allow);
+	right = (left >> 8); left &= 255;
+	tmp = ra_scratch(as, rset_exclude(rset_exclude(allow, left), right));
+	emit_fai(as, pi, rt, tmp, ofs);
+	emit_tab(as, PPCI_ADD, tmp, left, right);
+	return;
+      }
+      if (!checki16(ofs)) {
+	Reg left = ra_alloc1(as, ref, allow);
+	Reg right = ra_allock(as, ofs, rset_exclude(allow, left));
+	emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right);
+	return;
+      }
+    }
+  }
+  base = ra_alloc1(as, ref, allow);
+  emit_fai(as, pi, rt, base, ofs);
+}
+
+/* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */
+static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
+			  RegSet allow)
+{
+  IRIns *ira = IR(ref);
+  Reg right, left;
+  if (canfuse(as, ira) && ira->o == IR_ADD && ra_noreg(ira->r)) {
+    left = ra_alloc2(as, ira, allow);
+    right = (left >> 8); left &= 255;
+  } else {
+    right = ra_alloc1(as, ref, allow);
+    left = RID_R0;
+  }
+  emit_tab(as, pi, rt, left, right);
+}
+
+/* Fuse to multiply-add/sub instruction. */
+static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  IRIns *irm;
+  if (lref != rref &&
+      ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+	ra_noreg(irm->r)) ||
+       (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+	(rref = lref, pi = pir, ra_noreg(irm->r))))) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg add = ra_alloc1(as, rref, RSET_FPR);
+    Reg right, left = ra_alloc2(as, irm, rset_exclude(RSET_FPR, add));
+    right = (left >> 8); left &= 255;
+    emit_facb(as, pi, dest, left, right, add);
+    return 1;
+  }
+  return 0;
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n, nargs = CCI_NARGS(ci);
+  int32_t ofs = 8;
+  Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
+  if ((void *)ci->func)
+    emit_call(as, (void *)ci->func);
+  for (n = 0; n < nargs; n++) {  /* Setup args. */
+    IRRef ref = args[n];
+    if (ref) {
+      IRIns *ir = IR(ref);
+      if (irt_isfp(ir->t)) {
+	if (fpr <= REGARG_LASTFPR) {
+	  lua_assert(rset_test(as->freeset, fpr));  /* Already evicted. */
+	  ra_leftov(as, fpr, ref);
+	  fpr++;
+	} else {
+	  Reg r = ra_alloc1(as, ref, RSET_FPR);
+	  if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
+	  emit_spstore(as, ir, r, ofs);
+	  ofs += irt_isnum(ir->t) ? 8 : 4;
+	}
+      } else {
+	if (gpr <= REGARG_LASTGPR) {
+	  lua_assert(rset_test(as->freeset, gpr));  /* Already evicted. */
+	  ra_leftov(as, gpr, ref);
+	  gpr++;
+	} else {
+	  Reg r = ra_alloc1(as, ref, RSET_GPR);
+	  emit_spstore(as, ir, r, ofs);
+	  ofs += 4;
+	}
+      }
+    } else {
+      if (gpr <= REGARG_LASTGPR)
+	gpr++;
+      else
+	ofs += 4;
+    }
+    checkmclim(as);
+  }
+  if ((ci->flags & CCI_VARARG))  /* Vararg calls need to know about FPR use. */
+    emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  RegSet drop = RSET_SCRATCH;
+  int hiop = ((ir+1)->o == IR_HIOP);
+  if ((ci->flags & CCI_NOFPRCLOBBER))
+    drop &= ~RSET_FPR;
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  if (hiop && ra_hasreg((ir+1)->r))
+    rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);  /* Evictions must be performed first. */
+  if (ra_used(ir)) {
+    lua_assert(!irt_ispri(ir->t));
+    if (irt_isfp(ir->t)) {
+      if ((ci->flags & CCI_CASTU64)) {
+	/* Use spill slot or temp slots. */
+	int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
+	Reg dest = ir->r;
+	if (ra_hasreg(dest)) {
+	  ra_free(as, dest);
+	  ra_modified(as, dest);
+	  emit_fai(as, PPCI_LFD, dest, RID_SP, ofs);
+	}
+	emit_tai(as, PPCI_STW, RID_RETHI, RID_SP, ofs);
+	emit_tai(as, PPCI_STW, RID_RETLO, RID_SP, ofs+4);
+      } else {
+	ra_destreg(as, ir, RID_FPRET);
+      }
+    } else if (hiop) {
+      ra_destpair(as, ir);
+    } else {
+      ra_destreg(as, ir, RID_RET);
+    }
+  }
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX];
+  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+  asm_collectargs(as, ir, ci, args);
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  CCallInfo ci;
+  IRRef func;
+  IRIns *irf;
+  ci.flags = asm_callx_flags(as, ir);
+  asm_collectargs(as, ir, &ci, args);
+  asm_setupresult(as, ir, &ci);
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  if (irref_isk(func)) {  /* Call to constant address. */
+    ci.func = (ASMFunction)(void *)(irf->i);
+  } else {  /* Need a non-argument register for indirect calls. */
+    RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
+    Reg freg = ra_alloc1(as, func, allow);
+    *--as->mcp = PPCI_BCTRL;
+    *--as->mcp = PPCI_MTCTR | PPCF_T(freg);
+    ci.func = (ASMFunction)(void *)0;
+  }
+  asm_gencall(as, &ci, args);
+}
+
+static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[id];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+  void *pc = ir_kptr(IR(ir->op2));
+  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  as->topslot -= (BCReg)delta;
+  if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
+  emit_setgl(as, base, jit_base);
+  emit_addptr(as, base, -8*delta);
+  asm_guardcc(as, CC_NE);
+  emit_ab(as, PPCI_CMPW, RID_TMP,
+	  ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
+  emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+  RegSet allow = RSET_FPR;
+  Reg tmp = ra_scratch(as, rset_clear(allow, left));
+  Reg fbias = ra_scratch(as, rset_clear(allow, tmp));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg hibias = ra_allock(as, 0x43300000, rset_exclude(RSET_GPR, dest));
+  asm_guardcc(as, CC_NE);
+  emit_fab(as, PPCI_FCMPU, 0, tmp, left);
+  emit_fab(as, PPCI_FSUB, tmp, tmp, fbias);
+  emit_fai(as, PPCI_LFD, tmp, RID_SP, SPOFS_TMP);
+  emit_tai(as, PPCI_STW, RID_TMP, RID_SP, SPOFS_TMPLO);
+  emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
+  emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
+  emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
+  emit_lsptr(as, PPCI_LFS, (fbias & 31),
+	     (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
+	     RSET_GPR);
+  emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
+  emit_fb(as, PPCI_FCTIWZ, tmp, left);
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_FPR;
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, allow);
+  Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
+  Reg tmp = ra_scratch(as, rset_clear(allow, right));
+  emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
+  emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
+  emit_fab(as, PPCI_FADD, tmp, left, right);
+}
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+  int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+  IRRef lref = ir->op1;
+  lua_assert(irt_type(ir->t) != st);
+  lua_assert(!(irt_isint64(ir->t) ||
+	       (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
+  if (irt_isfp(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    if (stfp) {  /* FP to FP conversion. */
+      if (st == IRT_NUM)  /* double -> float conversion. */
+	emit_fb(as, PPCI_FRSP, dest, ra_alloc1(as, lref, RSET_FPR));
+      else  /* float -> double conversion is a no-op on PPC. */
+	ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+    } else {  /* Integer to FP conversion. */
+      /* IRT_INT: Flip hibit, bias with 2^52, subtract 2^52+2^31. */
+      /* IRT_U32: Bias with 2^52, subtract 2^52. */
+      RegSet allow = RSET_GPR;
+      Reg left = ra_alloc1(as, lref, allow);
+      Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
+      Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
+      const float *kbias;
+      if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
+      emit_fab(as, PPCI_FSUB, dest, dest, fbias);
+      emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
+      kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
+      if (st == IRT_U32) kbias++;
+      emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
+		 rset_clear(allow, hibias));
+      emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
+	       RID_SP, SPOFS_TMPLO);
+      emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
+      if (st != IRT_U32) emit_asi(as, PPCI_XORIS, RID_TMP, left, 0x8000);
+    }
+  } else if (stfp) {  /* FP to integer conversion. */
+    if (irt_isguard(ir->t)) {
+      /* Checked conversions are only supported from number to int. */
+      lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+      asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+    } else {
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      Reg left = ra_alloc1(as, lref, RSET_FPR);
+      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+      if (irt_isu32(ir->t)) {
+	/* Convert both x and x-2^31 to int and merge results. */
+	Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest));
+	emit_asb(as, PPCI_OR, dest, dest, tmpi);  /* Select with mask idiom. */
+	emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP);
+	emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP);
+	emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO);  /* tmp = (int)(x) */
+	emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000);  /* dest += 2^31 */
+	emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31);  /* mask = -(dest < 0) */
+	emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
+	emit_tai(as, PPCI_LWZ, dest,
+		 RID_SP, SPOFS_TMPLO);  /* dest = (int)(x-2^31) */
+	emit_fb(as, PPCI_FCTIWZ, tmp, left);
+	emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
+	emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
+	emit_fab(as, PPCI_FSUB, tmp, left, tmp);
+	emit_lsptr(as, PPCI_LFS, (tmp & 31),
+		   (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
+		   RSET_GPR);
+      } else {
+	emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
+	emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
+	emit_fb(as, PPCI_FCTIWZ, tmp, left);
+      }
+    }
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
+      Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+      lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+      if ((ir->op2 & IRCONV_SEXT))
+	emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left);
+      else
+	emit_rot(as, PPCI_RLWINM, dest, left, 0, st == IRT_U8 ? 24 : 16, 31);
+    } else {  /* 32/64 bit integer conversions. */
+      /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
+      ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+    }
+  }
+}
+
+#if LJ_HASFFI
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+  IRCallID id;
+  const CCallInfo *ci;
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = (ir-1)->op1;
+  if (st == IRT_NUM || st == IRT_FLOAT) {
+    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+    ir--;
+  } else {
+    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
+  }
+  ci = &lj_ir_callinfo[id];
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+#endif
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+  IRRef args[2];
+  int32_t ofs;
+  RegSet drop = RSET_SCRATCH;
+  if (ra_hasreg(ir->r)) rset_set(drop, ir->r);  /* Spill dest reg (if any). */
+  ra_evictset(as, drop);
+  asm_guardcc(as, CC_EQ);
+  emit_ai(as, PPCI_CMPWI, RID_RET, 0);  /* Test return status. */
+  args[0] = ir->op1;      /* GCstr *str */
+  args[1] = ASMREF_TMP1;  /* TValue *n  */
+  asm_gencall(as, ci, args);
+  /* Store the result to the spill slot or temp slots. */
+  ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
+  emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
+}
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (irt_isnum(ir->t)) {
+    if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
+      ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+    else  /* Otherwise force a spill and use the spill slot. */
+      emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
+  } else {
+    /* Otherwise use g->tmptv to hold the TValue. */
+    RegSet allow = rset_exclude(RSET_GPR, dest);
+    Reg type;
+    emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
+    if (!irt_ispri(ir->t)) {
+      Reg src = ra_alloc1(as, ref, allow);
+      emit_setgl(as, src, tmptv.gcr);
+    }
+    type = ra_allock(as, irt_toitype(ir->t), allow);
+    emit_setgl(as, type, tmptv.it);
+  }
+}
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+  IRRef args[2];
+  args[0] = ASMREF_L;
+  as->gcsteps++;
+  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
+    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
+    args[1] = ASMREF_TMP1;  /* const lua_Number * */
+    asm_setupresult(as, ir, ci);  /* GCstr * */
+    asm_gencall(as, ci, args);
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+  } else {
+    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
+    args[1] = ir->op1;  /* int32_t k */
+    asm_setupresult(as, ir, ci);  /* GCstr * */
+    asm_gencall(as, ci, args);
+  }
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx, base;
+  if (irref_isk(ir->op2)) {
+    IRRef tab = IR(ir->op1)->op1;
+    int32_t ofs = asm_fuseabase(as, tab);
+    IRRef refa = ofs ? tab : ir->op1;
+    ofs += 8*IR(ir->op2)->i;
+    if (checki16(ofs)) {
+      base = ra_alloc1(as, refa, RSET_GPR);
+      emit_tai(as, PPCI_ADDI, dest, base, ofs);
+      return;
+    }
+  }
+  base = ra_alloc1(as, ir->op1, RSET_GPR);
+  idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+  emit_tab(as, PPCI_ADD, dest, RID_TMP, base);
+  emit_slwi(as, RID_TMP, idx, 3);
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+**   Node *n = hashkey(t, key);
+**   do {
+**     if (lj_obj_equal(&n->key, key)) return &n->val;
+**   } while ((n = nextnode(n)));
+**   return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+{
+  RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
+  Reg dest = ra_dest(as, ir, allow);
+  Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+  Reg key = RID_NONE, tmp1 = RID_TMP, tmp2;
+  Reg tisnum = RID_NONE, tmpnum = RID_NONE;
+  IRRef refkey = ir->op2;
+  IRIns *irkey = IR(refkey);
+  IRType1 kt = irkey->t;
+  uint32_t khash;
+  MCLabel l_end, l_loop, l_next;
+
+  rset_clear(allow, tab);
+  if (irt_isnum(kt)) {
+    key = ra_alloc1(as, refkey, RSET_FPR);
+    tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
+    tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
+    rset_clear(allow, tisnum);
+  } else if (!irt_ispri(kt)) {
+    key = ra_alloc1(as, refkey, allow);
+    rset_clear(allow, key);
+  }
+  tmp2 = ra_scratch(as, allow);
+  rset_clear(allow, tmp2);
+
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
+  l_end = emit_label(as);
+  as->invmcp = NULL;
+  if (merge == IR_NE)
+    asm_guardcc(as, CC_EQ);
+  else if (destused)
+    emit_loada(as, dest, niltvg(J2G(as->J)));
+
+  /* Follow hash chain until the end. */
+  l_loop = --as->mcp;
+  emit_ai(as, PPCI_CMPWI, dest, 0);
+  emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(Node, next));
+  l_next = emit_label(as);
+
+  /* Type and value comparison. */
+  if (merge == IR_EQ)
+    asm_guardcc(as, CC_EQ);
+  else
+    emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
+  if (irt_isnum(kt)) {
+    emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
+    emit_condbranch(as, PPCI_BC, CC_GE, l_next);
+    emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
+    emit_fai(as, PPCI_LFD, tmpnum, dest, (int32_t)offsetof(Node, key.n));
+  } else {
+    if (!irt_ispri(kt)) {
+      emit_ab(as, PPCI_CMPW, tmp2, key);
+      emit_condbranch(as, PPCI_BC, CC_NE, l_next);
+    }
+    emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
+    if (!irt_ispri(kt))
+      emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
+  }
+  emit_tai(as, PPCI_LWZ, tmp1, dest, (int32_t)offsetof(Node, key.it));
+  *l_loop = PPCI_BC | PPCF_Y | PPCF_CC(CC_NE) |
+	    (((char *)as->mcp-(char *)l_loop) & 0xffffu);
+
+  /* Load main position relative to tab->node into dest. */
+  khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
+  if (khash == 0) {
+    emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
+  } else {
+    Reg tmphash = tmp1;
+    if (irref_isk(refkey))
+      tmphash = ra_allock(as, khash, allow);
+    emit_tab(as, PPCI_ADD, dest, dest, tmp1);
+    emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
+    emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
+    emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
+    emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
+    if (irref_isk(refkey)) {
+      /* Nothing to do. */
+    } else if (irt_isstr(kt)) {
+      emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
+    } else {  /* Must match with hash*() in lj_tab.c. */
+      emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1);
+      emit_rotlwi(as, tmp2, tmp2, HASH_ROT3);
+      emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
+      emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
+      emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
+      if (irt_isnum(kt)) {
+	int32_t ofs = ra_spill(as, irkey);
+	emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
+	emit_rotlwi(as, dest, tmp1, HASH_ROT1);
+	emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
+	emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
+	emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
+      } else {
+	emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
+	emit_rotlwi(as, dest, tmp1, HASH_ROT1);
+	emit_tai(as, PPCI_ADDI, tmp1, tmp2, HASH_BIAS);
+	emit_tai(as, PPCI_ADDIS, tmp2, key, (HASH_BIAS + 32768)>>16);
+      }
+    }
+  }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+  IRIns *kslot = IR(ir->op2);
+  IRIns *irkey = IR(kslot->op1);
+  int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+  int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+  Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+  Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg key = RID_NONE, type = RID_TMP, idx = node;
+  RegSet allow = rset_exclude(RSET_GPR, node);
+  lua_assert(ofs % sizeof(Node) == 0);
+  if (ofs > 32736) {
+    idx = dest;
+    rset_clear(allow, dest);
+    kofs = (int32_t)offsetof(Node, key);
+  } else if (ra_hasreg(dest)) {
+    emit_tai(as, PPCI_ADDI, dest, node, ofs);
+  }
+  asm_guardcc(as, CC_NE);
+  if (!irt_ispri(irkey->t)) {
+    key = ra_scratch(as, allow);
+    rset_clear(allow, key);
+  }
+  rset_clear(allow, type);
+  if (irt_isnum(irkey->t)) {
+    emit_cmpi(as, key, (int32_t)ir_knum(irkey)->u32.lo);
+    asm_guardcc(as, CC_NE);
+    emit_cmpi(as, type, (int32_t)ir_knum(irkey)->u32.hi);
+  } else {
+    if (ra_hasreg(key)) {
+      emit_cmpi(as, key, irkey->i);  /* May use RID_TMP, i.e. type. */
+      asm_guardcc(as, CC_NE);
+    }
+    emit_ai(as, PPCI_CMPWI, type, irt_toitype(irkey->t));
+  }
+  if (ra_hasreg(key)) emit_tai(as, PPCI_LWZ, key, idx, kofs+4);
+  emit_tai(as, PPCI_LWZ, type, idx, kofs);
+  if (ofs > 32736) {
+    emit_tai(as, PPCI_ADDIS, dest, dest, (ofs + 32768) >> 16);
+    emit_tai(as, PPCI_ADDI, dest, node, ofs);
+  }
+}
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+  IRRef args[3];
+  if (ir->r == RID_SINK)
+    return;
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ir->op1;      /* GCtab *t     */
+  args[2] = ASMREF_TMP1;  /* cTValue *key */
+  asm_setupresult(as, ir, ci);  /* TValue * */
+  asm_gencall(as, ci, args);
+  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+  /* NYI: Check that UREFO is still open and not aliasing a slot. */
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  if (irref_isk(ir->op1)) {
+    GCfunc *fn = ir_kfunc(IR(ir->op1));
+    MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+    emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
+  } else {
+    Reg uv = ra_scratch(as, RSET_GPR);
+    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (ir->o == IR_UREFC) {
+      asm_guardcc(as, CC_NE);
+      emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
+      emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
+      emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    } else {
+      emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
+    }
+    emit_tai(as, PPCI_LWZ, uv, func,
+	     (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
+  }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+  UNUSED(as); UNUSED(ir);
+  lua_assert(!ra_used(ir));
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  IRRef ref = ir->op2, refk = ir->op1;
+  int32_t ofs = (int32_t)sizeof(GCstr);
+  Reg r;
+  if (irref_isk(ref)) {
+    IRRef tmp = refk; refk = ref; ref = tmp;
+  } else if (!irref_isk(refk)) {
+    Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
+    IRIns *irr = IR(ir->op2);
+    if (ra_hasreg(irr->r)) {
+      ra_noweak(as, irr->r);
+      right = irr->r;
+    } else if (mayfuse(as, irr->op2) &&
+	       irr->o == IR_ADD && irref_isk(irr->op2) &&
+	       checki16(ofs + IR(irr->op2)->i)) {
+      ofs += IR(irr->op2)->i;
+      right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
+    } else {
+      right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
+    }
+    emit_tai(as, PPCI_ADDI, dest, dest, ofs);
+    emit_tab(as, PPCI_ADD, dest, left, right);
+    return;
+  }
+  r = ra_alloc1(as, ref, RSET_GPR);
+  ofs += IR(refk)->i;
+  if (checki16(ofs))
+    emit_tai(as, PPCI_ADDI, dest, r, ofs);
+  else
+    emit_tab(as, PPCI_ADD, dest, r,
+	     ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static PPCIns asm_fxloadins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: return PPCI_LBZ;  /* Needs sign-extension. */
+  case IRT_U8: return PPCI_LBZ;
+  case IRT_I16: return PPCI_LHA;
+  case IRT_U16: return PPCI_LHZ;
+  case IRT_NUM: return PPCI_LFD;
+  case IRT_FLOAT: return PPCI_LFS;
+  default: return PPCI_LWZ;
+  }
+}
+
+static PPCIns asm_fxstoreins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: case IRT_U8: return PPCI_STB;
+  case IRT_I16: case IRT_U16: return PPCI_STH;
+  case IRT_NUM: return PPCI_STFD;
+  case IRT_FLOAT: return PPCI_STFS;
+  default: return PPCI_STW;
+  }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
+  PPCIns pi = asm_fxloadins(ir);
+  int32_t ofs;
+  if (ir->op2 == IRFL_TAB_ARRAY) {
+    ofs = asm_fuseabase(as, ir->op1);
+    if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+      emit_tai(as, PPCI_ADDI, dest, idx, ofs);
+      return;
+    }
+  }
+  ofs = field_ofs[ir->op2];
+  lua_assert(!irt_isi8(ir->t));
+  emit_tai(as, pi, dest, idx, ofs);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+    IRIns *irf = IR(ir->op1);
+    Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+    int32_t ofs = field_ofs[irf->op2];
+    PPCIns pi = asm_fxstoreins(ir);
+    emit_tai(as, pi, src, idx, ofs);
+  }
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+  lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+  if (irt_isi8(ir->t))
+    emit_as(as, PPCI_EXTSB, dest, dest);
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
+}
+
+static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+{
+  IRIns *irb;
+  if (ir->r == RID_SINK)
+    return;
+  if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
+      ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
+    /* Fuse BSWAP with XSTORE to stwbrx. */
+    Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
+    asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
+  } else {
+    Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+    asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+		 rset_exclude(RSET_GPR, src), ofs);
+  }
+}
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+  IRType1 t = ir->t;
+  Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
+  RegSet allow = RSET_GPR;
+  int32_t ofs = AHUREF_LSX;
+  if (ra_used(ir)) {
+    lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
+    if (!irt_isnum(t)) ofs = 0;
+    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+    rset_clear(allow, dest);
+  }
+  idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+  if (irt_isnum(t)) {
+    Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx));
+    asm_guardcc(as, CC_GE);
+    emit_ab(as, PPCI_CMPLW, type, tisnum);
+    if (ra_hasreg(dest)) {
+      if (ofs == AHUREF_LSX) {
+	tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
+						       (idx&255)), (idx>>8)));
+	emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
+      } else {
+	emit_fai(as, PPCI_LFD, dest, idx, ofs);
+      }
+    }
+  } else {
+    asm_guardcc(as, CC_NE);
+    emit_ai(as, PPCI_CMPWI, type, irt_toitype(t));
+    if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, idx, ofs+4);
+  }
+  if (ofs == AHUREF_LSX) {
+    emit_tab(as, PPCI_LWZX, type, (idx&255), tmp);
+    emit_slwi(as, tmp, (idx>>8), 3);
+  } else {
+    emit_tai(as, PPCI_LWZ, type, idx, ofs);
+  }
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_GPR;
+  Reg idx, src = RID_NONE, type = RID_NONE;
+  int32_t ofs = AHUREF_LSX;
+  if (ir->r == RID_SINK)
+    return;
+  if (irt_isnum(ir->t)) {
+    src = ra_alloc1(as, ir->op2, RSET_FPR);
+  } else {
+    if (!irt_ispri(ir->t)) {
+      src = ra_alloc1(as, ir->op2, allow);
+      rset_clear(allow, src);
+      ofs = 0;
+    }
+    type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+    rset_clear(allow, type);
+  }
+  idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+  if (irt_isnum(ir->t)) {
+    if (ofs == AHUREF_LSX) {
+      emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
+      emit_slwi(as, RID_TMP, (idx>>8), 3);
+    } else {
+      emit_fai(as, PPCI_STFD, src, idx, ofs);
+    }
+  } else {
+    if (ra_hasreg(src))
+      emit_tai(as, PPCI_STW, src, idx, ofs+4);
+    if (ofs == AHUREF_LSX) {
+      emit_tab(as, PPCI_STWX, type, (idx&255), RID_TMP);
+      emit_slwi(as, RID_TMP, (idx>>8), 3);
+    } else {
+      emit_tai(as, PPCI_STW, type, idx, ofs);
+    }
+  }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 0 : 4);
+  IRType1 t = ir->t;
+  Reg dest = RID_NONE, type = RID_NONE, base;
+  RegSet allow = RSET_GPR;
+  lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
+  lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+  lua_assert(LJ_DUALNUM ||
+	     !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
+  if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
+    dest = ra_scratch(as, RSET_FPR);
+    asm_tointg(as, ir, dest);
+    t.irt = IRT_NUM;  /* Continue with a regular number type check. */
+  } else if (ra_used(ir)) {
+    lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
+    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+    rset_clear(allow, dest);
+    base = ra_alloc1(as, REF_BASE, allow);
+    rset_clear(allow, base);
+    if ((ir->op2 & IRSLOAD_CONVERT)) {
+      if (irt_isint(t)) {
+	emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
+	dest = ra_scratch(as, RSET_FPR);
+	emit_fai(as, PPCI_STFD, dest, RID_SP, SPOFS_TMP);
+	emit_fb(as, PPCI_FCTIWZ, dest, dest);
+	t.irt = IRT_NUM;  /* Check for original type. */
+      } else {
+	Reg tmp = ra_scratch(as, allow);
+	Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, tmp));
+	Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
+	emit_fab(as, PPCI_FSUB, dest, dest, fbias);
+	emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
+	emit_lsptr(as, PPCI_LFS, (fbias & 31),
+		   (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
+		   rset_clear(allow, hibias));
+	emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
+	emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
+	emit_asi(as, PPCI_XORIS, tmp, tmp, 0x8000);
+	dest = tmp;
+	t.irt = IRT_INT;  /* Check for original type. */
+      }
+    }
+    goto dotypecheck;
+  }
+  base = ra_alloc1(as, REF_BASE, allow);
+  rset_clear(allow, base);
+dotypecheck:
+  if (irt_isnum(t)) {
+    if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+      Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
+      asm_guardcc(as, CC_GE);
+      emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
+      type = RID_TMP;
+    }
+    if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
+  } else {
+    if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+      asm_guardcc(as, CC_NE);
+      emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t));
+      type = RID_TMP;
+    }
+    if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs);
+  }
+  if (ra_hasreg(type)) emit_tai(as, PPCI_LWZ, type, base, ofs-4);
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+  CTState *cts = ctype_ctsG(J2G(as->J));
+  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
+  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
+	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+  IRRef args[2];
+  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  RegSet drop = RSET_SCRATCH;
+  lua_assert(sz != CTSIZE_INVALID);
+
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
+  as->gcsteps++;
+
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);
+  if (ra_used(ir))
+    ra_destreg(as, ir, RID_RET);  /* GCcdata * */
+
+  /* Initialize immutable cdata object. */
+  if (ir->o == IR_CNEWI) {
+    int32_t ofs = sizeof(GCcdata);
+    lua_assert(sz == 4 || sz == 8);
+    if (sz == 8) {
+      ofs += 4;
+      lua_assert((ir+1)->o == IR_HIOP);
+    }
+    for (;;) {
+      Reg r = ra_alloc1(as, ir->op2, allow);
+      emit_tai(as, PPCI_STW, r, RID_RET, ofs);
+      rset_clear(allow, r);
+      if (ofs == sizeof(GCcdata)) break;
+      ofs -= 4; ir++;
+    }
+  }
+  /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
+  emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
+  emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
+  emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
+  emit_ti(as, PPCI_LI, RID_TMP, ctypeid);  /* Lower 16 bit used. Sign-ext ok. */
+  asm_gencall(as, ci, args);
+  ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+	       ra_releasetmp(as, ASMREF_TMP1));
+}
+#else
+#define asm_cnew(as, ir)	((void)0)
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+  Reg link = RID_TMP;
+  MCLabel l_end = emit_label(as);
+  emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist));
+  emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked));
+  emit_setgl(as, tab, gc.grayagain);
+  lua_assert(LJ_GC_BLACK == 0x04);
+  emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28);  /* Clear black bit. */
+  emit_getgl(as, link, gc.grayagain);
+  emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
+  emit_asi(as, PPCI_ANDIDOT, RID_TMP, mark, LJ_GC_BLACK);
+  emit_tai(as, PPCI_LBZ, mark, tab, (int32_t)offsetof(GCtab, marked));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg obj, val, tmp;
+  /* No need for other object barriers (yet). */
+  lua_assert(IR(ir->op1)->o == IR_UREFC);
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ir->op1;      /* TValue *tv      */
+  asm_gencall(as, ci, args);
+  emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
+  obj = IR(ir->op1)->r;
+  tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
+  emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
+  emit_asi(as, PPCI_ANDIDOT, tmp, tmp, LJ_GC_BLACK);
+  emit_condbranch(as, PPCI_BC, CC_EQ, l_end);
+  emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, LJ_GC_WHITES);
+  val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
+  emit_tai(as, PPCI_LBZ, tmp, obj,
+	   (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
+  emit_tai(as, PPCI_LBZ, RID_TMP, val, (int32_t)offsetof(GChead, marked));
+}
+
+/* -- Arithmetic and logic operations ------------------------------------- */
+
+static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = (left >> 8); left &= 255;
+  if (pi == PPCI_FMUL)
+    emit_fac(as, pi, dest, left, right);
+  else
+    emit_fab(as, pi, dest, left, right);
+}
+
+static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+  emit_fb(as, pi, dest, left);
+}
+
+static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
+{
+  IRIns *irp = IR(ir->op1);
+  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+    IRIns *irpp = IR(irp->op1);
+    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
+      IRRef args[2];
+      args[0] = irpp->op1;
+      args[1] = irp->op2;
+      asm_setupresult(as, ir, ci);
+      asm_gencall(as, ci, args);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
+      asm_fparith(as, ir, PPCI_FADD);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    PPCIns pi;
+    if (irref_isk(ir->op2)) {
+      int32_t k = IR(ir->op2)->i;
+      if (checki16(k)) {
+	pi = PPCI_ADDI;
+	/* May fail due to spills/restores above, but simplifies the logic. */
+	if (as->flagmcp == as->mcp) {
+	  as->flagmcp = NULL;
+	  as->mcp++;
+	  pi = PPCI_ADDICDOT;
+	}
+	emit_tai(as, pi, dest, left, k);
+	return;
+      } else if ((k & 0xffff) == 0) {
+	emit_tai(as, PPCI_ADDIS, dest, left, (k >> 16));
+	return;
+      } else if (!as->sectref) {
+	emit_tai(as, PPCI_ADDIS, dest, dest, (k + 32768) >> 16);
+	emit_tai(as, PPCI_ADDI, dest, left, k);
+	return;
+      }
+    }
+    pi = PPCI_ADD;
+    /* May fail due to spills/restores above, but simplifies the logic. */
+    if (as->flagmcp == as->mcp) {
+      as->flagmcp = NULL;
+      as->mcp++;
+      pi |= PPCF_DOT;
+    }
+    right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    emit_tab(as, pi, dest, left, right);
+  }
+}
+
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
+      asm_fparith(as, ir, PPCI_FSUB);
+  } else {
+    PPCIns pi = PPCI_SUBF;
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left, right;
+    if (irref_isk(ir->op1)) {
+      int32_t k = IR(ir->op1)->i;
+      if (checki16(k)) {
+	right = ra_alloc1(as, ir->op2, RSET_GPR);
+	emit_tai(as, PPCI_SUBFIC, dest, right, k);
+	return;
+      }
+    }
+    /* May fail due to spills/restores above, but simplifies the logic. */
+    if (as->flagmcp == as->mcp) {
+      as->flagmcp = NULL;
+      as->mcp++;
+      pi |= PPCF_DOT;
+    }
+    left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    emit_tab(as, pi, dest, right, left);  /* Subtract right _from_ left. */
+  }
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fparith(as, ir, PPCI_FMUL);
+  } else {
+    PPCIns pi = PPCI_MULLW;
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    if (irref_isk(ir->op2)) {
+      int32_t k = IR(ir->op2)->i;
+      if (checki16(k)) {
+	emit_tai(as, PPCI_MULLI, dest, left, k);
+	return;
+      }
+    }
+    /* May fail due to spills/restores above, but simplifies the logic. */
+    if (as->flagmcp == as->mcp) {
+      as->flagmcp = NULL;
+      as->mcp++;
+      pi |= PPCF_DOT;
+    }
+    right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    emit_tab(as, pi, dest, left, right);
+  }
+}
+
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fpunary(as, ir, PPCI_FNEG);
+  } else {
+    Reg dest, left;
+    PPCIns pi = PPCI_NEG;
+    if (as->flagmcp == as->mcp) {
+      as->flagmcp = NULL;
+      as->mcp++;
+      pi |= PPCF_DOT;
+    }
+    dest = ra_dest(as, ir, RSET_GPR);
+    left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    emit_tab(as, pi, dest, left, 0);
+  }
+}
+
+static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
+{
+  Reg dest, left, right;
+  if (as->flagmcp == as->mcp) {
+    as->flagmcp = NULL;
+    as->mcp++;
+  }
+  asm_guardcc(as, CC_SO);
+  dest = ra_dest(as, ir, RSET_GPR);
+  left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  if (pi == PPCI_SUBFO) { Reg tmp = left; left = right; right = tmp; }
+  emit_tab(as, pi|PPCF_DOT, dest, left, right);
+}
+
+#if LJ_HASFFI
+static void asm_add64(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
+  PPCIns pi = PPCI_ADDE;
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    if (k == 0)
+      pi = PPCI_ADDZE;
+    else if (k == -1)
+      pi = PPCI_ADDME;
+    else
+      goto needright;
+    right = 0;
+  } else {
+  needright:
+    right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  }
+  emit_tab(as, pi, dest, left, right);
+  ir--;
+  dest = ra_dest(as, ir, RSET_GPR);
+  left = ra_alloc1(as, ir->op1, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    if (checki16(k)) {
+      emit_tai(as, PPCI_ADDIC, dest, left, k);
+      return;
+    }
+  }
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_tab(as, PPCI_ADDC, dest, left, right);
+}
+
+static void asm_sub64(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left, right = ra_alloc1(as, ir->op2, RSET_GPR);
+  PPCIns pi = PPCI_SUBFE;
+  if (irref_isk(ir->op1)) {
+    int32_t k = IR(ir->op1)->i;
+    if (k == 0)
+      pi = PPCI_SUBFZE;
+    else if (k == -1)
+      pi = PPCI_SUBFME;
+    else
+      goto needleft;
+    left = 0;
+  } else {
+  needleft:
+    left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right));
+  }
+  emit_tab(as, pi, dest, right, left);  /* Subtract right _from_ left. */
+  ir--;
+  dest = ra_dest(as, ir, RSET_GPR);
+  right = ra_alloc1(as, ir->op2, RSET_GPR);
+  if (irref_isk(ir->op1)) {
+    int32_t k = IR(ir->op1)->i;
+    if (checki16(k)) {
+      emit_tai(as, PPCI_SUBFIC, dest, right, k);
+      return;
+    }
+  }
+  left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right));
+  emit_tab(as, PPCI_SUBFC, dest, right, left);
+}
+
+static void asm_neg64(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+  emit_tab(as, PPCI_SUBFZE, dest, left, 0);
+  ir--;
+  dest = ra_dest(as, ir, RSET_GPR);
+  left = ra_alloc1(as, ir->op1, RSET_GPR);
+  emit_tai(as, PPCI_SUBFIC, dest, left, 0);
+}
+#endif
+
+static void asm_bitnot(ASMState *as, IRIns *ir)
+{
+  Reg dest, left, right;
+  PPCIns pi = PPCI_NOR;
+  if (as->flagmcp == as->mcp) {
+    as->flagmcp = NULL;
+    as->mcp++;
+    pi |= PPCF_DOT;
+  }
+  dest = ra_dest(as, ir, RSET_GPR);
+  if (mayfuse(as, ir->op1)) {
+    IRIns *irl = IR(ir->op1);
+    if (irl->o == IR_BAND)
+      pi ^= (PPCI_NOR ^ PPCI_NAND);
+    else if (irl->o == IR_BXOR)
+      pi ^= (PPCI_NOR ^ PPCI_EQV);
+    else if (irl->o != IR_BOR)
+      goto nofuse;
+    left = ra_hintalloc(as, irl->op1, dest, RSET_GPR);
+    right = ra_alloc1(as, irl->op2, rset_exclude(RSET_GPR, left));
+  } else {
+nofuse:
+    left = right = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  }
+  emit_asb(as, pi, dest, left, right);
+}
+
+static void asm_bitswap(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  IRIns *irx;
+  if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD &&
+      ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) {
+    /* Fuse BSWAP with XLOAD to lwbrx. */
+    asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR);
+  } else {
+    Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+    Reg tmp = dest;
+    if (tmp == left) {
+      tmp = RID_TMP;
+      emit_mr(as, dest, RID_TMP);
+    }
+    emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23);
+    emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7);
+    emit_rotlwi(as, tmp, left, 8);
+  }
+}
+
+static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    Reg tmp = left;
+    if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
+      if (!checku16(k)) {
+	emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
+	if ((k & 0xffff) == 0) return;
+      }
+      emit_asi(as, pik, dest, left, k);
+      return;
+    }
+  }
+  /* May fail due to spills/restores above, but simplifies the logic. */
+  if (as->flagmcp == as->mcp) {
+    as->flagmcp = NULL;
+    as->mcp++;
+    pi |= PPCF_DOT;
+  }
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_asb(as, pi, dest, left, right);
+}
+
+/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
+static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
+{
+  IRIns *ir;
+  Reg left;
+  if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) &&
+      irref_isk(ir->op2) && ir->o >= IR_BSHL && ir->o <= IR_BROR) {
+    int32_t sh = (IR(ir->op2)->i & 31);
+    switch (ir->o) {
+    case IR_BSHL:
+      if ((mask & ((1u<<sh)-1))) goto nofuse;
+      break;
+    case IR_BSHR:
+      if ((mask & ~((~0u)>>sh))) goto nofuse;
+      sh = ((32-sh)&31);
+      break;
+    case IR_BROL:
+      break;
+    default:
+      goto nofuse;
+    }
+    left = ra_alloc1(as, ir->op1, RSET_GPR);
+    *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh);
+    return;
+  }
+nofuse:
+  left = ra_alloc1(as, ref, RSET_GPR);
+  *--as->mcp = pi | PPCF_T(left);
+}
+
+static void asm_bitand(ASMState *as, IRIns *ir)
+{
+  Reg dest, left, right;
+  IRRef lref = ir->op1;
+  PPCIns dot = 0;
+  IRRef op2;
+  if (as->flagmcp == as->mcp) {
+    as->flagmcp = NULL;
+    as->mcp++;
+    dot = PPCF_DOT;
+  }
+  dest = ra_dest(as, ir, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    if (k) {
+      /* First check for a contiguous bitmask as used by rlwinm. */
+      uint32_t s1 = lj_ffs((uint32_t)k);
+      uint32_t k1 = ((uint32_t)k >> s1);
+      if ((k1 & (k1+1)) == 0) {
+	asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) |
+			  PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1),
+			  k, lref);
+	return;
+      }
+      if (~(uint32_t)k) {
+	uint32_t s2 = lj_ffs(~(uint32_t)k);
+	uint32_t k2 = (~(uint32_t)k >> s2);
+	if ((k2 & (k2+1)) == 0) {
+	  asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) |
+			    PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)),
+			    k, lref);
+	  return;
+	}
+      }
+    }
+    if (checku16(k)) {
+      left = ra_alloc1(as, lref, RSET_GPR);
+      emit_asi(as, PPCI_ANDIDOT, dest, left, k);
+      return;
+    } else if ((k & 0xffff) == 0) {
+      left = ra_alloc1(as, lref, RSET_GPR);
+      emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16));
+      return;
+    }
+  }
+  op2 = ir->op2;
+  if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) {
+    dot ^= (PPCI_AND ^ PPCI_ANDC);
+    op2 = IR(op2)->op1;
+  }
+  left = ra_hintalloc(as, lref, dest, RSET_GPR);
+  right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left));
+  emit_asb(as, PPCI_AND ^ dot, dest, left, right);
+}
+
+static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
+{
+  Reg dest, left;
+  Reg dot = 0;
+  if (as->flagmcp == as->mcp) {
+    as->flagmcp = NULL;
+    as->mcp++;
+    dot = PPCF_DOT;
+  }
+  dest = ra_dest(as, ir, RSET_GPR);
+  left = ra_alloc1(as, ir->op1, RSET_GPR);
+  if (irref_isk(ir->op2)) {  /* Constant shifts. */
+    int32_t shift = (IR(ir->op2)->i & 31);
+    if (pik == 0)  /* SLWI */
+      emit_rot(as, PPCI_RLWINM|dot, dest, left, shift, 0, 31-shift);
+    else if (pik == 1)  /* SRWI */
+      emit_rot(as, PPCI_RLWINM|dot, dest, left, (32-shift)&31, shift, 31);
+    else
+      emit_asb(as, pik|dot, dest, left, shift);
+  } else {
+    Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    emit_asb(as, pi|dot, dest, left, right);
+  }
+}
+
+static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
+{
+  if (irt_isnum(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg tmp = dest;
+    Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+    right = (left >> 8); left &= 255;
+    if (tmp == left || tmp == right)
+      tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR,
+					dest), left), right));
+    emit_facb(as, PPCI_FSEL, dest, tmp,
+	      ismax ? left : right, ismax ? right : left);
+    emit_fab(as, PPCI_FSUB, tmp, left, right);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg tmp1 = RID_TMP, tmp2 = dest;
+    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+    right = (left >> 8); left &= 255;
+    if (tmp2 == left || tmp2 == right)
+      tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR,
+					 dest), left), right));
+    emit_tab(as, PPCI_ADD, dest, tmp2, right);
+    emit_asb(as, ismax ? PPCI_ANDC : PPCI_AND, tmp2, tmp2, tmp1);
+    emit_tab(as, PPCI_SUBFE, tmp1, tmp1, tmp1);
+    emit_tab(as, PPCI_SUBFC, tmp2, tmp2, tmp1);
+    emit_asi(as, PPCI_XORIS, tmp2, right, 0x8000);
+    emit_asi(as, PPCI_XORIS, tmp1, left, 0x8000);
+  }
+}
+
+/* -- Comparisons --------------------------------------------------------- */
+
+#define CC_UNSIGNED	0x08	/* Unsigned integer comparison. */
+#define CC_TWO		0x80	/* Check two flags for FP comparison. */
+
+/* Map of comparisons to flags. ORDER IR. */
+static const uint8_t asm_compmap[IR_ABC+1] = {
+  /* op     int cc                 FP cc */
+  /* LT  */ CC_GE               + (CC_GE<<4),
+  /* GE  */ CC_LT               + (CC_LE<<4) + CC_TWO,
+  /* LE  */ CC_GT               + (CC_GE<<4) + CC_TWO,
+  /* GT  */ CC_LE               + (CC_LE<<4),
+  /* ULT */ CC_GE + CC_UNSIGNED + (CC_GT<<4) + CC_TWO,
+  /* UGE */ CC_LT + CC_UNSIGNED + (CC_LT<<4),
+  /* ULE */ CC_GT + CC_UNSIGNED + (CC_GT<<4),
+  /* UGT */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO,
+  /* EQ  */ CC_NE               + (CC_NE<<4),
+  /* NE  */ CC_EQ               + (CC_EQ<<4),
+  /* ABC */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO  /* Same as UGT. */
+};
+
+static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
+{
+  Reg right, left = ra_alloc1(as, lref, RSET_GPR);
+  if (irref_isk(rref)) {
+    int32_t k = IR(rref)->i;
+    if ((cc & CC_UNSIGNED) == 0) {  /* Signed comparison with constant. */
+      if (checki16(k)) {
+	emit_tai(as, PPCI_CMPWI, cr, left, k);
+	/* Signed comparison with zero and referencing previous ins? */
+	if (k == 0 && lref == as->curins-1)
+	  as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
+	return;
+      } else if ((cc & 3) == (CC_EQ & 3)) {  /* Use CMPLWI for EQ or NE. */
+	if (checku16(k)) {
+	  emit_tai(as, PPCI_CMPLWI, cr, left, k);
+	  return;
+	} else if (!as->sectref && ra_noreg(IR(rref)->r)) {
+	  emit_tai(as, PPCI_CMPLWI, cr, RID_TMP, k);
+	  emit_asi(as, PPCI_XORIS, RID_TMP, left, (k >> 16));
+	  return;
+	}
+      }
+    } else {  /* Unsigned comparison with constant. */
+      if (checku16(k)) {
+	emit_tai(as, PPCI_CMPLWI, cr, left, k);
+	return;
+      }
+    }
+  }
+  right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
+  emit_tab(as, (cc & CC_UNSIGNED) ? PPCI_CMPLW : PPCI_CMPW, cr, left, right);
+}
+
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+  PPCCC cc = asm_compmap[ir->o];
+  if (irt_isnum(ir->t)) {
+    Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+    right = (left >> 8); left &= 255;
+    asm_guardcc(as, (cc >> 4));
+    if ((cc & CC_TWO))
+      emit_tab(as, PPCI_CROR, ((cc>>4)&3), ((cc>>4)&3), (CC_EQ&3));
+    emit_fab(as, PPCI_FCMPU, 0, left, right);
+  } else {
+    IRRef lref = ir->op1, rref = ir->op2;
+    if (irref_isk(lref) && !irref_isk(rref)) {
+      /* Swap constants to the right (only for ABC). */
+      IRRef tmp = lref; lref = rref; rref = tmp;
+      if ((cc & 2) == 0) cc ^= 1;  /* LT <-> GT, LE <-> GE */
+    }
+    asm_guardcc(as, cc);
+    asm_intcomp_(as, lref, rref, 0, cc);
+  }
+}
+
+#if LJ_HASFFI
+/* 64 bit integer comparisons. */
+static void asm_comp64(ASMState *as, IRIns *ir)
+{
+  PPCCC cc = asm_compmap[(ir-1)->o];
+  if ((cc&3) == (CC_EQ&3)) {
+    asm_guardcc(as, cc);
+    emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CROR,
+	     (CC_EQ&3), (CC_EQ&3), 4+(CC_EQ&3));
+  } else {
+    asm_guardcc(as, CC_EQ);
+    emit_tab(as, PPCI_CROR, (CC_EQ&3), (CC_EQ&3), ((cc^~(cc>>2))&1));
+    emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CRANDC,
+	     (CC_EQ&3), (CC_EQ&3), 4+(cc&3));
+  }
+  /* Loword comparison sets cr1 and is unsigned, except for equality. */
+  asm_intcomp_(as, (ir-1)->op1, (ir-1)->op2, 4,
+	       cc | ((cc&3) == (CC_EQ&3) ? 0 : CC_UNSIGNED));
+  /* Hiword comparison sets cr0. */
+  asm_intcomp_(as, ir->op1, ir->op2, 0, cc);
+  as->flagmcp = NULL;  /* Doesn't work here. */
+}
+#endif
+
+/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+
+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+#if LJ_HASFFI
+  /* HIOP is marked as a store because it needs its own DCE logic. */
+  int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
+  if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+  if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
+    as->curins--;  /* Always skip the CONV. */
+    if (usehi || uselo)
+      asm_conv64(as, ir);
+    return;
+  } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
+    as->curins--;  /* Always skip the loword comparison. */
+    asm_comp64(as, ir);
+    return;
+  } else if ((ir-1)->o == IR_XSTORE) {
+    as->curins--;  /* Handle both stores here. */
+    if ((ir-1)->r != RID_SINK) {
+      asm_xstore(as, ir, 0);
+      asm_xstore(as, ir-1, 4);
+    }
+    return;
+  }
+  if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
+  switch ((ir-1)->o) {
+  case IR_ADD: as->curins--; asm_add64(as, ir); break;
+  case IR_SUB: as->curins--; asm_sub64(as, ir); break;
+  case IR_NEG: as->curins--; asm_neg64(as, ir); break;
+  case IR_CALLN:
+  case IR_CALLXS:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
+    break;
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
+  default: lua_assert(0); break;
+  }
+#else
+  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused without FFI. */
+#endif
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as, BCReg topslot,
+			    IRIns *irp, RegSet allow, ExitNo exitno)
+{
+  /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */
+  Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
+  rset_clear(allow, pbase);
+  tmp = allow ? rset_pickbot(allow) :
+		(pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
+  emit_condbranch(as, PPCI_BC, CC_LT, asm_exitstub_addr(as, exitno));
+  if (allow == RSET_EMPTY)  /* Restore temp. register. */
+    emit_tai(as, PPCI_LWZ, tmp, RID_SP, SPOFS_TMPW);
+  else
+    ra_modified(as, tmp);
+  emit_ai(as, PPCI_CMPLWI, RID_TMP, (int32_t)(8*topslot));
+  emit_tab(as, PPCI_SUBF, RID_TMP, pbase, tmp);
+  emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
+  if (pbase == RID_TMP)
+    emit_getgl(as, RID_TMP, jit_base);
+  emit_getgl(as, tmp, jit_L);
+  if (allow == RSET_EMPTY)  /* Spill temp. register. */
+    emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+  SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
+  MSize n, nent = snap->nent;
+  /* Store the value of all modified slots to the Lua stack. */
+  for (n = 0; n < nent; n++) {
+    SnapEntry sn = map[n];
+    BCReg s = snap_slot(sn);
+    int32_t ofs = 8*((int32_t)s-1);
+    IRRef ref = snap_ref(sn);
+    IRIns *ir = IR(ref);
+    if ((sn & SNAP_NORESTORE))
+      continue;
+    if (irt_isnum(ir->t)) {
+      Reg src = ra_alloc1(as, ref, RSET_FPR);
+      emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
+    } else {
+      Reg type;
+      RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+      lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, allow);
+	rset_clear(allow, src);
+	emit_tai(as, PPCI_STW, src, RID_BASE, ofs+4);
+      }
+      if ((sn & (SNAP_CONT|SNAP_FRAME))) {
+	if (s == 0) continue;  /* Do not overwrite link to previous frame. */
+	type = ra_allock(as, (int32_t)(*flinks--), allow);
+      } else {
+	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+      }
+      emit_tai(as, PPCI_STW, type, RID_BASE, ofs);
+    }
+    checkmclim(as);
+  }
+  lua_assert(map + nent == flinks);
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg tmp;
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+  asm_guardcc(as, CC_NE);  /* Assumes asm_snap_prep() already done. */
+  emit_ai(as, PPCI_CMPWI, RID_RET, 0);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ASMREF_TMP2;  /* MSize steps     */
+  asm_gencall(as, ci, args);
+  emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
+  tmp = ra_releasetmp(as, ASMREF_TMP2);
+  emit_loadi(as, tmp, as->gcsteps);
+  /* Jump around GC step if GC total < GC threshold. */
+  emit_condbranch(as, PPCI_BC|PPCF_Y, CC_LT, l_end);
+  emit_ab(as, PPCI_CMPLW, RID_TMP, tmp);
+  emit_getgl(as, tmp, gc.threshold);
+  emit_getgl(as, RID_TMP, gc.total);
+  as->gcsteps = 0;
+  checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+  MCode *p = as->mctop;
+  MCode *target = as->mcp;
+  if (as->loopinv) {  /* Inverted loop branch? */
+    /* asm_guardcc already inverted the cond branch and patched the final b. */
+    p[-2] = (p[-2] & (0xffff0000u & ~PPCF_Y)) | (((target-p+2) & 0x3fffu) << 2);
+  } else {
+    p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2);
+  }
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (r != RID_BASE)
+      emit_mr(as, r, RID_BASE);
+  }
+}
+
+/* Coalesce BASE register for a side trace. */
+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+{
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (irp->r == r) {
+      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+    } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
+      rset_clear(allow, irp->r);
+      emit_mr(as, r, irp->r);  /* Move from coalesced parent reg. */
+    } else {
+      emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
+    }
+  }
+  return allow;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+  MCode *p = as->mctop;
+  MCode *target;
+  int32_t spadj = as->T->spadjust;
+  if (spadj == 0) {
+    *--p = PPCI_NOP;
+    *--p = PPCI_NOP;
+    as->mctop = p;
+  } else {
+    /* Patch stack adjustment. */
+    lua_assert(checki16(CFRAME_SIZE+spadj));
+    p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
+    p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
+  }
+  /* Patch exit branch. */
+  target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
+  p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2);
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+  MCode *p = as->mctop - 1;  /* Leave room for exit branch. */
+  if (as->loopref) {
+    as->invmcp = as->mcp = p;
+  } else {
+    as->mcp = p-2;  /* Leave room for stack pointer adjustment. */
+    as->invmcp = NULL;
+  }
+}
+
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+  switch ((IROp)ir->o) {
+  /* Miscellaneous ops. */
+  case IR_LOOP: asm_loop(as); break;
+  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
+  case IR_USE:
+    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+  case IR_PHI: asm_phi(as, ir); break;
+  case IR_HIOP: asm_hiop(as, ir); break;
+  case IR_GCSTEP: asm_gcstep(as, ir); break;
+
+  /* Guarded assertions. */
+  case IR_EQ: case IR_NE:
+    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+      as->curins--;
+      asm_href(as, ir-1, (IROp)ir->o);
+      break;
+    }
+    /* fallthrough */
+  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
+  case IR_ABC:
+    asm_comp(as, ir);
+    break;
+
+  case IR_RETF: asm_retf(as, ir); break;
+
+  /* Bit ops. */
+  case IR_BNOT: asm_bitnot(as, ir); break;
+  case IR_BSWAP: asm_bitswap(as, ir); break;
+
+  case IR_BAND: asm_bitand(as, ir); break;
+  case IR_BOR:  asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
+  case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
+
+  case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
+  case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
+  case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
+  case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
+			     PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
+  case IR_BROR: lua_assert(0); break;
+
+  /* Arithmetic ops. */
+  case IR_ADD: asm_add(as, ir); break;
+  case IR_SUB: asm_sub(as, ir); break;
+  case IR_MUL: asm_mul(as, ir); break;
+  case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
+  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
+  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
+  case IR_NEG: asm_neg(as, ir); break;
+
+  case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
+  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
+  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
+  case IR_MIN: asm_min_max(as, ir, 0); break;
+  case IR_MAX: asm_min_max(as, ir, 1); break;
+  case IR_FPMATH:
+    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+      break;
+    if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
+      asm_fpunary(as, ir, PPCI_FSQRT);
+    else
+      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
+    break;
+
+  /* Overflow-checking arithmetic ops. */
+  case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
+  case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
+  case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
+
+  /* Memory references. */
+  case IR_AREF: asm_aref(as, ir); break;
+  case IR_HREF: asm_href(as, ir, 0); break;
+  case IR_HREFK: asm_hrefk(as, ir); break;
+  case IR_NEWREF: asm_newref(as, ir); break;
+  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+  case IR_FREF: asm_fref(as, ir); break;
+  case IR_STRREF: asm_strref(as, ir); break;
+
+  /* Loads and stores. */
+  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+    asm_ahuvload(as, ir);
+    break;
+  case IR_FLOAD: asm_fload(as, ir); break;
+  case IR_XLOAD: asm_xload(as, ir); break;
+  case IR_SLOAD: asm_sload(as, ir); break;
+
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+  case IR_FSTORE: asm_fstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir, 0); break;
+
+  /* Allocations. */
+  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+  case IR_TNEW: asm_tnew(as, ir); break;
+  case IR_TDUP: asm_tdup(as, ir); break;
+  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
+
+  /* Write barriers. */
+  case IR_TBAR: asm_tbar(as, ir); break;
+  case IR_OBAR: asm_obar(as, ir); break;
+
+  /* Type conversions. */
+  case IR_CONV: asm_conv(as, ir); break;
+  case IR_TOBIT: asm_tobit(as, ir); break;
+  case IR_TOSTR: asm_tostr(as, ir); break;
+  case IR_STRTO: asm_strto(as, ir); break;
+
+  /* Calls. */
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+  case IR_CALLXS: asm_callx(as, ir); break;
+  case IR_CARG: break;
+
+  default:
+    setintV(&as->J->errinfo, ir->o);
+    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+    break;
+  }
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+  asm_collectargs(as, ir, ci, args);
+  for (i = 0; i < nargs; i++)
+    if (args[i] && irt_isfp(IR(args[i])->t)) {
+      if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
+    } else {
+      if (ngpr > 0) ngpr--; else nslots++;
+    }
+  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
+    as->evenspill = nslots;
+  return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
+}
+
+static void asm_setup_target(ASMState *as)
+{
+  asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
+}
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+  MCode *p = T->mcode;
+  MCode *pe = (MCode *)((char *)p + T->szmcode);
+  MCode *px = exitstub_trace_addr(T, exitno);
+  MCode *cstart = NULL;
+  MCode *mcarea = lj_mcode_patch(J, p, 0);
+  int clearso = 0;
+  for (; p < pe; p++) {
+    /* Look for exitstub branch, try to replace with branch to target. */
+    uint32_t ins = *p;
+    if ((ins & 0xfc000000u) == 0x40000000u &&
+	((ins ^ ((char *)px-(char *)p)) & 0xffffu) == 0) {
+      ptrdiff_t delta = (char *)target - (char *)p;
+      if (((ins >> 16) & 3) == (CC_SO&3)) {
+	clearso = sizeof(MCode);
+	delta -= sizeof(MCode);
+      }
+      /* Many, but not all short-range branches can be patched directly. */
+      if (((delta + 0x8000) >> 16) == 0) {
+	*p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) |
+	     ((delta & 0x8000) * (PPCF_Y/0x8000));
+	if (!cstart) cstart = p;
+      }
+    } else if ((ins & 0xfc000000u) == PPCI_B &&
+	       ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) {
+      ptrdiff_t delta = (char *)target - (char *)p;
+      lua_assert(((delta + 0x02000000) >> 26) == 0);
+      *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
+      if (!cstart) cstart = p;
+    }
+  }
+  {  /* Always patch long-range branch in exit stub itself. */
+    ptrdiff_t delta = (char *)target - (char *)px - clearso;
+    lua_assert(((delta + 0x02000000) >> 26) == 0);
+    *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
+  }
+  if (!cstart) cstart = px;
+  lj_mcode_sync(cstart, px+1);
+  if (clearso) {  /* Extend the current trace. Ugly workaround. */
+    MCode *pp = J->cur.mcode;
+    J->cur.szmcode += sizeof(MCode);
+    *--pp = PPCI_MCRXR;  /* Clear SO flag. */
+    J->cur.mcode = pp;
+    lj_mcode_sync(pp, pp+1);
+  }
+  lj_mcode_patch(J, mcarea, 1);
+}
+

+ 2806 - 0
luajit.mod/luajit/src/lj_asm_x86.h

@@ -0,0 +1,2806 @@
+/*
+** x86/x64 IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Generate an exit stub group at the bottom of the reserved MCode memory. */
+static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
+{
+  ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff;
+  MCode *mxp = as->mcbot;
+  MCode *mxpstart = mxp;
+  if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop)
+    asm_mclimit(as);
+  /* Push low byte of exitno for each exit stub. */
+  *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs;
+  for (i = 1; i < EXITSTUBS_PER_GROUP; i++) {
+    *mxp++ = XI_JMPs; *mxp++ = (MCode)((2+2)*(EXITSTUBS_PER_GROUP - i) - 2);
+    *mxp++ = XI_PUSHi8; *mxp++ = (MCode)(groupofs + i);
+  }
+  /* Push the high byte of the exitno for each exit stub group. */
+  *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
+  /* Store DISPATCH at original stack slot 0. Account for the two push ops. */
+  *mxp++ = XI_MOVmi;
+  *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
+  *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
+  *mxp++ = 2*sizeof(void *);
+  *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
+  /* Jump to exit handler which fills in the ExitState. */
+  *mxp++ = XI_JMP; mxp += 4;
+  *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
+  /* Commit the code for this group (even if assembly fails later on). */
+  lj_mcode_commitbot(as->J, mxp);
+  as->mcbot = mxp;
+  as->mclim = as->mcbot + MCLIM_REDZONE;
+  return mxpstart;
+}
+
+/* Setup all needed exit stubs. */
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+{
+  ExitNo i;
+  if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
+    lj_trace_err(as->J, LJ_TRERR_SNAPOV);
+  for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
+    if (as->J->exitstubgroup[i] == NULL)
+      as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
+}
+
+/* Emit conditional branch to exit for guard.
+** It's important to emit this *after* all registers have been allocated,
+** because rematerializations may invalidate the flags.
+*/
+static void asm_guardcc(ASMState *as, int cc)
+{
+  MCode *target = exitstub_addr(as->J, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    *(int32_t *)(p+1) = jmprel(p+5, target);
+    target = p;
+    cc ^= 1;
+    if (as->realign) {
+      emit_sjcc(as, cc, target);
+      return;
+    }
+  }
+  emit_jcc(as, cc, target);
+}
+
+/* -- Memory operand fusion ----------------------------------------------- */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM	31
+
+/* Check if a reference is a signed 32 bit constant. */
+static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
+{
+  if (irref_isk(ref)) {
+    IRIns *ir = IR(ref);
+    if (ir->o != IR_KINT64) {
+      *k = ir->i;
+      return 1;
+    } else if (checki32((int64_t)ir_kint64(ir)->u64)) {
+      *k = (int32_t)ir_kint64(ir)->u64;
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/* Check if there's no conflicting instruction between curins and ref.
+** Also avoid fusing loads if there are multiple references.
+*/
+static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
+{
+  IRIns *ir = as->ir;
+  IRRef i = as->curins;
+  if (i > ref + CONFLICT_SEARCH_LIM)
+    return 0;  /* Give up, ref is too far away. */
+  while (--i > ref) {
+    if (ir[i].o == conflict)
+      return 0;  /* Conflict found. */
+    else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
+      return 0;
+  }
+  return 1;  /* Ok, no conflict. */
+}
+
+/* Fuse array base into memory operand. */
+static IRRef asm_fuseabase(ASMState *as, IRRef ref)
+{
+  IRIns *irb = IR(ref);
+  as->mrm.ofs = 0;
+  if (irb->o == IR_FLOAD) {
+    IRIns *ira = IR(irb->op1);
+    lua_assert(irb->op2 == IRFL_TAB_ARRAY);
+    /* We can avoid the FLOAD of t->array for colocated arrays. */
+    if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
+	!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
+      as->mrm.ofs = (int32_t)sizeof(GCtab);  /* Ofs to colocated array. */
+      return irb->op1;  /* Table obj. */
+    }
+  } else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
+    /* Fuse base offset (vararg load). */
+    as->mrm.ofs = IR(irb->op2)->i;
+    return irb->op1;
+  }
+  return ref;  /* Otherwise use the given array base. */
+}
+
+/* Fuse array reference into memory operand. */
+static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
+{
+  IRIns *irx;
+  lua_assert(ir->o == IR_AREF);
+  as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow);
+  irx = IR(ir->op2);
+  if (irref_isk(ir->op2)) {
+    as->mrm.ofs += 8*irx->i;
+    as->mrm.idx = RID_NONE;
+  } else {
+    rset_clear(allow, as->mrm.base);
+    as->mrm.scale = XM_SCALE8;
+    /* Fuse a constant ADD (e.g. t[i+1]) into the offset.
+    ** Doesn't help much without ABCelim, but reduces register pressure.
+    */
+    if (!LJ_64 &&  /* Has bad effects with negative index on x64. */
+	mayfuse(as, ir->op2) && ra_noreg(irx->r) &&
+	irx->o == IR_ADD && irref_isk(irx->op2)) {
+      as->mrm.ofs += 8*IR(irx->op2)->i;
+      as->mrm.idx = (uint8_t)ra_alloc1(as, irx->op1, allow);
+    } else {
+      as->mrm.idx = (uint8_t)ra_alloc1(as, ir->op2, allow);
+    }
+  }
+}
+
+/* Fuse array/hash/upvalue reference into memory operand.
+** Caveat: this may allocate GPRs for the base/idx registers. Be sure to
+** pass the final allow mask, excluding any GPRs used for other inputs.
+** In particular: 2-operand GPR instructions need to call ra_dest() first!
+*/
+static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  if (ra_noreg(ir->r)) {
+    switch ((IROp)ir->o) {
+    case IR_AREF:
+      if (mayfuse(as, ref)) {
+	asm_fusearef(as, ir, allow);
+	return;
+      }
+      break;
+    case IR_HREFK:
+      if (mayfuse(as, ref)) {
+	as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
+	as->mrm.ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+	as->mrm.idx = RID_NONE;
+	return;
+      }
+      break;
+    case IR_UREFC:
+      if (irref_isk(ir->op1)) {
+	GCfunc *fn = ir_kfunc(IR(ir->op1));
+	GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
+	as->mrm.ofs = ptr2addr(&uv->tv);
+	as->mrm.base = as->mrm.idx = RID_NONE;
+	return;
+      }
+      break;
+    default:
+      lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO ||
+		 ir->o == IR_KKPTR);
+      break;
+    }
+  }
+  as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow);
+  as->mrm.ofs = 0;
+  as->mrm.idx = RID_NONE;
+}
+
+/* Fuse FLOAD/FREF reference into memory operand. */
+static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
+{
+  lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
+  as->mrm.ofs = field_ofs[ir->op2];
+  as->mrm.idx = RID_NONE;
+  if (irref_isk(ir->op1)) {
+    as->mrm.ofs += IR(ir->op1)->i;
+    as->mrm.base = RID_NONE;
+  } else {
+    as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
+  }
+}
+
+/* Fuse string reference into memory operand. */
+static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
+{
+  IRIns *irr;
+  lua_assert(ir->o == IR_STRREF);
+  as->mrm.base = as->mrm.idx = RID_NONE;
+  as->mrm.scale = XM_SCALE1;
+  as->mrm.ofs = sizeof(GCstr);
+  if (irref_isk(ir->op1)) {
+    as->mrm.ofs += IR(ir->op1)->i;
+  } else {
+    Reg r = ra_alloc1(as, ir->op1, allow);
+    rset_clear(allow, r);
+    as->mrm.base = (uint8_t)r;
+  }
+  irr = IR(ir->op2);
+  if (irref_isk(ir->op2)) {
+    as->mrm.ofs += irr->i;
+  } else {
+    Reg r;
+    /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */
+    if (!LJ_64 &&  /* Has bad effects with negative index on x64. */
+	mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
+      as->mrm.ofs += IR(irr->op2)->i;
+      r = ra_alloc1(as, irr->op1, allow);
+    } else {
+      r = ra_alloc1(as, ir->op2, allow);
+    }
+    if (as->mrm.base == RID_NONE)
+      as->mrm.base = (uint8_t)r;
+    else
+      as->mrm.idx = (uint8_t)r;
+  }
+}
+
+static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  as->mrm.idx = RID_NONE;
+  if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+    as->mrm.ofs = ir->i;
+    as->mrm.base = RID_NONE;
+  } else if (ir->o == IR_STRREF) {
+    asm_fusestrref(as, ir, allow);
+  } else {
+    as->mrm.ofs = 0;
+    if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
+      /* Gather (base+idx*sz)+ofs as emitted by cdata ptr/array indexing. */
+      IRIns *irx;
+      IRRef idx;
+      Reg r;
+      if (asm_isk32(as, ir->op2, &as->mrm.ofs)) {  /* Recognize x+ofs. */
+	ref = ir->op1;
+	ir = IR(ref);
+	if (!(ir->o == IR_ADD && canfuse(as, ir) && ra_noreg(ir->r)))
+	  goto noadd;
+      }
+      as->mrm.scale = XM_SCALE1;
+      idx = ir->op1;
+      ref = ir->op2;
+      irx = IR(idx);
+      if (!(irx->o == IR_BSHL || irx->o == IR_ADD)) {  /* Try other operand. */
+	idx = ir->op2;
+	ref = ir->op1;
+	irx = IR(idx);
+      }
+      if (canfuse(as, irx) && ra_noreg(irx->r)) {
+	if (irx->o == IR_BSHL && irref_isk(irx->op2) && IR(irx->op2)->i <= 3) {
+	  /* Recognize idx<<b with b = 0-3, corresponding to sz = (1),2,4,8. */
+	  idx = irx->op1;
+	  as->mrm.scale = (uint8_t)(IR(irx->op2)->i << 6);
+	} else if (irx->o == IR_ADD && irx->op1 == irx->op2) {
+	  /* FOLD does idx*2 ==> idx<<1 ==> idx+idx. */
+	  idx = irx->op1;
+	  as->mrm.scale = XM_SCALE2;
+	}
+      }
+      r = ra_alloc1(as, idx, allow);
+      rset_clear(allow, r);
+      as->mrm.idx = (uint8_t)r;
+    }
+  noadd:
+    as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow);
+  }
+}
+
+/* Fuse load into memory operand. */
+static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  if (ra_hasreg(ir->r)) {
+    if (allow != RSET_EMPTY) {  /* Fast path. */
+      ra_noweak(as, ir->r);
+      return ir->r;
+    }
+  fusespill:
+    /* Force a spill if only memory operands are allowed (asm_x87load). */
+    as->mrm.base = RID_ESP;
+    as->mrm.ofs = ra_spill(as, ir);
+    as->mrm.idx = RID_NONE;
+    return RID_MRM;
+  }
+  if (ir->o == IR_KNUM) {
+    RegSet avail = as->freeset & ~as->modset & RSET_FPR;
+    lua_assert(allow != RSET_EMPTY);
+    if (!(avail & (avail-1))) {  /* Fuse if less than two regs available. */
+      as->mrm.ofs = ptr2addr(ir_knum(ir));
+      as->mrm.base = as->mrm.idx = RID_NONE;
+      return RID_MRM;
+    }
+  } else if (ir->o == IR_KINT64) {
+    RegSet avail = as->freeset & ~as->modset & RSET_GPR;
+    lua_assert(allow != RSET_EMPTY);
+    if (!(avail & (avail-1))) {  /* Fuse if less than two regs available. */
+      as->mrm.ofs = ptr2addr(ir_kint64(ir));
+      as->mrm.base = as->mrm.idx = RID_NONE;
+      return RID_MRM;
+    }
+  } else if (mayfuse(as, ref)) {
+    RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
+    if (ir->o == IR_SLOAD) {
+      if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
+	  noconflict(as, ref, IR_RETF, 0)) {
+	as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
+	as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
+	as->mrm.idx = RID_NONE;
+	return RID_MRM;
+      }
+    } else if (ir->o == IR_FLOAD) {
+      /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
+      if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
+	  noconflict(as, ref, IR_FSTORE, 0)) {
+	asm_fusefref(as, ir, xallow);
+	return RID_MRM;
+      }
+    } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
+      if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) {
+	asm_fuseahuref(as, ir->op1, xallow);
+	return RID_MRM;
+      }
+    } else if (ir->o == IR_XLOAD) {
+      /* Generic fusion is not ok for 8/16 bit operands (but see asm_comp).
+      ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
+      */
+      if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
+	  noconflict(as, ref, IR_XSTORE, 0)) {
+	asm_fusexref(as, ir->op1, xallow);
+	return RID_MRM;
+      }
+    } else if (ir->o == IR_VLOAD) {
+      asm_fuseahuref(as, ir->op1, xallow);
+      return RID_MRM;
+    }
+  }
+  if (!(as->freeset & allow) && !irref_isk(ref) &&
+      (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
+    goto fusespill;
+  return ra_allocref(as, ref, allow);
+}
+
+#if LJ_64
+/* Don't fuse a 32 bit load into a 64 bit operation. */
+static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
+{
+  if (is64 && !irt_is64(IR(ref)->t))
+    return ra_alloc1(as, ref, allow);
+  return asm_fuseload(as, ref, allow);
+}
+#else
+#define asm_fuseloadm(as, ref, allow, is64)  asm_fuseload(as, (ref), (allow))
+#endif
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Count the required number of stack slots for a call. */
+static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+  uint32_t i, nargs = CCI_NARGS(ci);
+  int nslots = 0;
+#if LJ_64
+  if (LJ_ABI_WIN) {
+    nslots = (int)(nargs*2);  /* Only matters for more than four args. */
+  } else {
+    int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+    for (i = 0; i < nargs; i++)
+      if (args[i] && irt_isfp(IR(args[i])->t)) {
+	if (nfpr > 0) nfpr--; else nslots += 2;
+      } else {
+	if (ngpr > 0) ngpr--; else nslots += 2;
+      }
+  }
+#else
+  int ngpr = 0;
+  if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
+    ngpr = 2;
+  else if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
+    ngpr = 1;
+  for (i = 0; i < nargs; i++)
+    if (args[i] && irt_isfp(IR(args[i])->t)) {
+      nslots += irt_isnum(IR(args[i])->t) ? 2 : 1;
+    } else {
+      if (ngpr > 0) ngpr--; else nslots++;
+    }
+#endif
+  return nslots;
+}
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n, nargs = CCI_NARGS(ci);
+  int32_t ofs = STACKARG_OFS;
+#if LJ_64
+  uint32_t gprs = REGARG_GPRS;
+  Reg fpr = REGARG_FIRSTFPR;
+#if !LJ_ABI_WIN
+  MCode *patchnfpr = NULL;
+#endif
+#else
+  uint32_t gprs = 0;
+  if ((ci->flags & CCI_CC_MASK) != CCI_CC_CDECL) {
+    if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
+      gprs = (REGARG_GPRS & 31);
+    else if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
+      gprs = REGARG_GPRS;
+  }
+#endif
+  if ((void *)ci->func)
+    emit_call(as, ci->func);
+#if LJ_64
+  if ((ci->flags & CCI_VARARG)) {  /* Special handling for vararg calls. */
+#if LJ_ABI_WIN
+    for (n = 0; n < 4 && n < nargs; n++) {
+      IRIns *ir = IR(args[n]);
+      if (irt_isfp(ir->t))  /* Duplicate FPRs in GPRs. */
+	emit_rr(as, XO_MOVDto, (irt_isnum(ir->t) ? REX_64 : 0) | (fpr+n),
+		((gprs >> (n*5)) & 31));  /* Either MOVD or MOVQ. */
+    }
+#else
+    patchnfpr = --as->mcp;  /* Indicate number of used FPRs in register al. */
+    *--as->mcp = XI_MOVrib | RID_EAX;
+#endif
+  }
+#endif
+  for (n = 0; n < nargs; n++) {  /* Setup args. */
+    IRRef ref = args[n];
+    IRIns *ir = IR(ref);
+    Reg r;
+#if LJ_64 && LJ_ABI_WIN
+    /* Windows/x64 argument registers are strictly positional. */
+    r = irt_isfp(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31);
+    fpr++; gprs >>= 5;
+#elif LJ_64
+    /* POSIX/x64 argument registers are used in order of appearance. */
+    if (irt_isfp(ir->t)) {
+      r = fpr <= REGARG_LASTFPR ? fpr++ : 0;
+    } else {
+      r = gprs & 31; gprs >>= 5;
+    }
+#else
+    if (ref && irt_isfp(ir->t)) {
+      r = 0;
+    } else {
+      r = gprs & 31; gprs >>= 5;
+      if (!ref) continue;
+    }
+#endif
+    if (r) {  /* Argument is in a register. */
+      if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
+#if LJ_64
+	if (ir->o == IR_KINT64)
+	  emit_loadu64(as, r, ir_kint64(ir)->u64);
+	else
+#endif
+	  emit_loadi(as, r, ir->i);
+      } else {
+	lua_assert(rset_test(as->freeset, r));  /* Must have been evicted. */
+	if (ra_hasreg(ir->r)) {
+	  ra_noweak(as, ir->r);
+	  emit_movrr(as, ir, r, ir->r);
+	} else {
+	  ra_allocref(as, ref, RID2RSET(r));
+	}
+      }
+    } else if (irt_isfp(ir->t)) {  /* FP argument is on stack. */
+      lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref)));  /* No float k. */
+      if (LJ_32 && (ofs & 4) && irref_isk(ref)) {
+	/* Split stores for unaligned FP consts. */
+	emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
+	emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
+      } else {
+	r = ra_alloc1(as, ref, RSET_FPR);
+	emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto,
+		  r, RID_ESP, ofs);
+      }
+      ofs += (LJ_32 && irt_isfloat(ir->t)) ? 4 : 8;
+    } else {  /* Non-FP argument is on stack. */
+      if (LJ_32 && ref < ASMREF_TMP1) {
+	emit_movmroi(as, RID_ESP, ofs, ir->i);
+      } else {
+	r = ra_alloc1(as, ref, RSET_GPR);
+	emit_movtomro(as, REX_64 + r, RID_ESP, ofs);
+      }
+      ofs += sizeof(intptr_t);
+    }
+    checkmclim(as);
+  }
+#if LJ_64 && !LJ_ABI_WIN
+  if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR;
+#endif
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  RegSet drop = RSET_SCRATCH;
+  int hiop = (LJ_32 && (ir+1)->o == IR_HIOP);
+  if ((ci->flags & CCI_NOFPRCLOBBER))
+    drop &= ~RSET_FPR;
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  if (hiop && ra_hasreg((ir+1)->r))
+    rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);  /* Evictions must be performed first. */
+  if (ra_used(ir)) {
+    if (irt_isfp(ir->t)) {
+      int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
+#if LJ_64
+      if ((ci->flags & CCI_CASTU64)) {
+	Reg dest = ir->r;
+	if (ra_hasreg(dest)) {
+	  ra_free(as, dest);
+	  ra_modified(as, dest);
+	  emit_rr(as, XO_MOVD, dest|REX_64, RID_RET);  /* Really MOVQ. */
+	}
+	if (ofs) emit_movtomro(as, RID_RET|REX_64, RID_ESP, ofs);
+      } else {
+	ra_destreg(as, ir, RID_FPRET);
+      }
+#else
+      /* Number result is in x87 st0 for x86 calling convention. */
+      Reg dest = ir->r;
+      if (ra_hasreg(dest)) {
+	ra_free(as, dest);
+	ra_modified(as, dest);
+	emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
+		  dest, RID_ESP, ofs);
+      }
+      if ((ci->flags & CCI_CASTU64)) {
+	emit_movtomro(as, RID_RETLO, RID_ESP, ofs);
+	emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
+      } else {
+	emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
+		  irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
+      }
+#endif
+#if LJ_32
+    } else if (hiop) {
+      ra_destpair(as, ir);
+#endif
+    } else {
+      lua_assert(!irt_ispri(ir->t));
+      ra_destreg(as, ir, RID_RET);
+    }
+  } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) {
+    emit_x87op(as, XI_FPOP);  /* Pop unused result from x87 st0. */
+  }
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX];
+  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+  asm_collectargs(as, ir, ci, args);
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+/* Return a constant function pointer or NULL for indirect calls. */
+static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
+{
+#if LJ_32
+  UNUSED(as);
+  if (irref_isk(func))
+    return (void *)irf->i;
+#else
+  if (irref_isk(func)) {
+    MCode *p;
+    if (irf->o == IR_KINT64)
+      p = (MCode *)(void *)ir_k64(irf)->u64;
+    else
+      p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
+    if (p - as->mcp == (int32_t)(p - as->mcp))
+      return p;  /* Call target is still in +-2GB range. */
+    /* Avoid the indirect case of emit_call(). Try to hoist func addr. */
+  }
+#endif
+  return NULL;
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  CCallInfo ci;
+  IRRef func;
+  IRIns *irf;
+  int32_t spadj = 0;
+  ci.flags = asm_callx_flags(as, ir);
+  asm_collectargs(as, ir, &ci, args);
+  asm_setupresult(as, ir, &ci);
+#if LJ_32
+  /* Have to readjust stack after non-cdecl calls due to callee cleanup. */
+  if ((ci.flags & CCI_CC_MASK) != CCI_CC_CDECL)
+    spadj = 4 * asm_count_call_slots(as, &ci, args);
+#endif
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  ci.func = (ASMFunction)asm_callx_func(as, irf, func);
+  if (!(void *)ci.func) {
+    /* Use a (hoistable) non-scratch register for indirect calls. */
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+    Reg r = ra_alloc1(as, func, allow);
+    if (LJ_32) emit_spsub(as, spadj);  /* Above code may cause restores! */
+    emit_rr(as, XO_GROUP5, XOg_CALL, r);
+  } else if (LJ_32) {
+    emit_spsub(as, spadj);
+  }
+  asm_gencall(as, &ci, args);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+  void *pc = ir_kptr(IR(ir->op2));
+  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  as->topslot -= (BCReg)delta;
+  if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
+  emit_setgl(as, base, jit_base);
+  emit_addptr(as, base, -8*delta);
+  asm_guardcc(as, CC_NE);
+  emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  asm_guardcc(as, CC_P);
+  asm_guardcc(as, CC_NE);
+  emit_rr(as, XO_UCOMISD, left, tmp);
+  emit_rr(as, XO_CVTSI2SD, tmp, dest);
+  if (!(as->flags & JIT_F_SPLIT_XMM))
+    emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
+  emit_rr(as, XO_CVTTSD2SI, dest, left);
+  /* Can't fuse since left is needed twice. */
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg tmp = ra_noreg(IR(ir->op1)->r) ?
+	      ra_alloc1(as, ir->op1, RSET_FPR) :
+	      ra_scratch(as, RSET_FPR);
+  Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
+  emit_rr(as, XO_MOVDto, tmp, dest);
+  emit_mrm(as, XO_ADDSD, tmp, right);
+  ra_left(as, tmp, ir->op1);
+}
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+  int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64));
+  int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+  IRRef lref = ir->op1;
+  lua_assert(irt_type(ir->t) != st);
+  lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64)));  /* Handled by SPLIT. */
+  if (irt_isfp(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    if (stfp) {  /* FP to FP conversion. */
+      Reg left = asm_fuseload(as, lref, RSET_FPR);
+      emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left);
+      if (left == dest) return;  /* Avoid the XO_XORPS. */
+    } else if (LJ_32 && st == IRT_U32) {  /* U32 to FP conversion on x86. */
+      /* number = (2^52+2^51 .. u32) - (2^52+2^51) */
+      cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000));
+      Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
+      if (irt_isfloat(ir->t))
+	emit_rr(as, XO_CVTSD2SS, dest, dest);
+      emit_rr(as, XO_SUBSD, dest, bias);  /* Subtract 2^52+2^51 bias. */
+      emit_rr(as, XO_XORPS, dest, bias);  /* Merge bias and integer. */
+      emit_loadn(as, bias, k);
+      emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
+      return;
+    } else {  /* Integer to FP conversion. */
+      Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ?
+		 ra_alloc1(as, lref, RSET_GPR) :
+		 asm_fuseloadm(as, lref, RSET_GPR, st64);
+      if (LJ_64 && st == IRT_U64) {
+	MCLabel l_end = emit_label(as);
+	const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000));
+	emit_rma(as, XO_ADDSD, dest, k);  /* Add 2^64 to compensate. */
+	emit_sjcc(as, CC_NS, l_end);
+	emit_rr(as, XO_TEST, left|REX_64, left);  /* Check if u64 >= 2^63. */
+      }
+      emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
+	       dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
+    }
+    if (!(as->flags & JIT_F_SPLIT_XMM))
+      emit_rr(as, XO_XORPS, dest, dest);  /* Avoid partial register stall. */
+  } else if (stfp) {  /* FP to integer conversion. */
+    if (irt_isguard(ir->t)) {
+      /* Checked conversions are only supported from number to int. */
+      lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+      asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+    } else {
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      x86Op op = st == IRT_NUM ?
+		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
+		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
+      if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
+	/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
+	/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
+	Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) :
+					  ra_scratch(as, RSET_FPR);
+	MCLabel l_end = emit_label(as);
+	if (LJ_32)
+	  emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
+	emit_rr(as, op, dest|REX_64, tmp);
+	if (st == IRT_NUM)
+	  emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J,
+		   LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
+	else
+	  emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J,
+		   LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
+	emit_sjcc(as, CC_NS, l_end);
+	emit_rr(as, XO_TEST, dest|REX_64, dest);  /* Check if dest negative. */
+	emit_rr(as, op, dest|REX_64, tmp);
+	ra_left(as, tmp, lref);
+      } else {
+	Reg left = asm_fuseload(as, lref, RSET_FPR);
+	if (LJ_64 && irt_isu32(ir->t))
+	  emit_rr(as, XO_MOV, dest, dest);  /* Zero hiword. */
+	emit_mrm(as, op,
+		 dest|((LJ_64 &&
+			(irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
+		 left);
+      }
+    }
+  } else if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
+    Reg left, dest = ra_dest(as, ir, RSET_GPR);
+    RegSet allow = RSET_GPR;
+    x86Op op;
+    lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+    if (st == IRT_I8) {
+      op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX;
+    } else if (st == IRT_U8) {
+      op = XO_MOVZXb; allow = RSET_GPR8; dest |= FORCE_REX;
+    } else if (st == IRT_I16) {
+      op = XO_MOVSXw;
+    } else {
+      op = XO_MOVZXw;
+    }
+    left = asm_fuseload(as, lref, allow);
+    /* Add extra MOV if source is already in wrong register. */
+    if (!LJ_64 && left != RID_MRM && !rset_test(allow, left)) {
+      Reg tmp = ra_scratch(as, allow);
+      emit_rr(as, op, dest, tmp);
+      emit_rr(as, XO_MOV, tmp, left);
+    } else {
+      emit_mrm(as, op, dest, left);
+    }
+  } else {  /* 32/64 bit integer conversions. */
+    if (LJ_32) {  /* Only need to handle 32/32 bit no-op (cast) on x86. */
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      ra_left(as, dest, lref);  /* Do nothing, but may need to move regs. */
+    } else if (irt_is64(ir->t)) {
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      if (st64 || !(ir->op2 & IRCONV_SEXT)) {
+	/* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
+	ra_left(as, dest, lref);  /* Do nothing, but may need to move regs. */
+      } else {  /* 32 to 64 bit sign extension. */
+	Reg left = asm_fuseload(as, lref, RSET_GPR);
+	emit_mrm(as, XO_MOVSXd, dest|REX_64, left);
+      }
+    } else {
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      if (st64) {
+	Reg left = asm_fuseload(as, lref, RSET_GPR);
+	/* This is either a 32 bit reg/reg mov which zeroes the hiword
+	** or a load of the loword from a 64 bit address.
+	*/
+	emit_mrm(as, XO_MOV, dest, left);
+      } else {  /* 32/32 bit no-op (cast). */
+	ra_left(as, dest, lref);  /* Do nothing, but may need to move regs. */
+      }
+    }
+  }
+}
+
+#if LJ_32 && LJ_HASFFI
+/* No SSE conversions to/from 64 bit on x86, so resort to ugly x87 code. */
+
+/* 64 bit integer to FP conversion in 32 bit mode. */
+static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
+{
+  Reg hi = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg lo = ra_alloc1(as, (ir-1)->op1, rset_exclude(RSET_GPR, hi));
+  int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
+  Reg dest = ir->r;
+  if (ra_hasreg(dest)) {
+    ra_free(as, dest);
+    ra_modified(as, dest);
+    emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
+	      dest, RID_ESP, ofs);
+  }
+  emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
+	    irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
+  if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
+    /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
+    MCLabel l_end = emit_label(as);
+    emit_rma(as, XO_FADDq, XOg_FADDq,
+	     lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
+    emit_sjcc(as, CC_NS, l_end);
+    emit_rr(as, XO_TEST, hi, hi);  /* Check if u64 >= 2^63. */
+  } else {
+    lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64);
+  }
+  emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0);
+  /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */
+  emit_rmro(as, XO_MOVto, hi, RID_ESP, 4);
+  emit_rmro(as, XO_MOVto, lo, RID_ESP, 0);
+}
+
+/* FP to 64 bit integer conversion in 32 bit mode. */
+static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+  Reg lo, hi;
+  lua_assert(st == IRT_NUM || st == IRT_FLOAT);
+  lua_assert(dt == IRT_I64 || dt == IRT_U64);
+  lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
+  hi = ra_dest(as, ir, RSET_GPR);
+  lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
+  if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
+  /* NYI: Avoid wide-to-narrow store-to-load forwarding stall. */
+  if (!(as->flags & JIT_F_SSE3)) {  /* Set FPU rounding mode to default. */
+    emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 4);
+    emit_rmro(as, XO_MOVto, lo, RID_ESP, 4);
+    emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff);
+  }
+  if (dt == IRT_U64) {
+    /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */
+    MCLabel l_pop, l_end = emit_label(as);
+    emit_x87op(as, XI_FPOP);
+    l_pop = emit_label(as);
+    emit_sjmp(as, l_end);
+    emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
+    if ((as->flags & JIT_F_SSE3))
+      emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
+    else
+      emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
+    emit_rma(as, XO_FADDq, XOg_FADDq,
+	     lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
+    emit_sjcc(as, CC_NS, l_pop);
+    emit_rr(as, XO_TEST, hi, hi);  /* Check if out-of-range (2^63). */
+  }
+  emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
+  if ((as->flags & JIT_F_SSE3)) {  /* Truncation is easy with SSE3. */
+    emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
+  } else {  /* Otherwise set FPU rounding mode to truncate before the store. */
+    emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
+    emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 0);
+    emit_rmro(as, XO_MOVtow, lo, RID_ESP, 0);
+    emit_rmro(as, XO_ARITHw(XOg_OR), lo, RID_ESP, 0);
+    emit_loadi(as, lo, 0xc00);
+    emit_rmro(as, XO_FNSTCW, XOg_FNSTCW, RID_ESP, 0);
+  }
+  if (dt == IRT_U64)
+    emit_x87op(as, XI_FDUP);
+  emit_mrm(as, st == IRT_NUM ? XO_FLDq : XO_FLDd,
+	   st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
+	   asm_fuseload(as, ir->op1, RSET_EMPTY));
+}
+#endif
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+  /* Force a spill slot for the destination register (if any). */
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+  IRRef args[2];
+  RegSet drop = RSET_SCRATCH;
+  if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
+    rset_set(drop, ir->r);  /* WIN64 doesn't spill all FPRs. */
+  ra_evictset(as, drop);
+  asm_guardcc(as, CC_E);
+  emit_rr(as, XO_TEST, RID_RET, RID_RET);  /* Test return status. */
+  args[0] = ir->op1;      /* GCstr *str */
+  args[1] = ASMREF_TMP1;  /* TValue *n  */
+  asm_gencall(as, ci, args);
+  /* Store the result to the spill slot or temp slots. */
+  emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
+	    RID_ESP, sps_scale(ir->s));
+}
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+  IRIns *irl = IR(ir->op1);
+  IRRef args[2];
+  args[0] = ASMREF_L;
+  as->gcsteps++;
+  if (irt_isnum(irl->t)) {
+    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
+    args[1] = ASMREF_TMP1;  /* const lua_Number * */
+    asm_setupresult(as, ir, ci);  /* GCstr * */
+    asm_gencall(as, ci, args);
+    emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
+	      RID_ESP, ra_spill(as, irl));
+  } else {
+    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
+    args[1] = ir->op1;  /* int32_t k */
+    asm_setupresult(as, ir, ci);  /* GCstr * */
+    asm_gencall(as, ci, args);
+  }
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  asm_fusearef(as, ir, RSET_GPR);
+  if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
+    emit_mrm(as, XO_LEA, dest, RID_MRM);
+  else if (as->mrm.base != dest)
+    emit_rr(as, XO_MOV, dest, as->mrm.base);
+}
+
+/* Merge NE(HREF, niltv) check. */
+static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
+{
+  /* Assumes nothing else generates NE of HREF. */
+  if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
+      ra_hasreg(ir->r)) {
+    MCode *p = as->mcp;
+    p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
+    /* Ensure no loop branch inversion happened. */
+    if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
+      as->mcp = p;  /* Kill cmp reg, imm32 + jz exit. */
+      return p + *(int32_t *)(p-4);  /* Return exit address. */
+    }
+  }
+  return NULL;
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+**   Node *n = hashkey(t, key);
+**   do {
+**     if (lj_obj_equal(&n->key, key)) return &n->val;
+**   } while ((n = nextnode(n)));
+**   return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir)
+{
+  MCode *nilexit = merge_href_niltv(as, ir);  /* Do this before any restores. */
+  RegSet allow = RSET_GPR;
+  Reg dest = ra_dest(as, ir, allow);
+  Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+  Reg key = RID_NONE, tmp = RID_NONE;
+  IRIns *irkey = IR(ir->op2);
+  int isk = irref_isk(ir->op2);
+  IRType1 kt = irkey->t;
+  uint32_t khash;
+  MCLabel l_end, l_loop, l_next;
+
+  if (!isk) {
+    rset_clear(allow, tab);
+    key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
+    if (!irt_isstr(kt))
+      tmp = ra_scratch(as, rset_exclude(allow, key));
+  }
+
+  /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */
+  l_end = emit_label(as);
+  if (nilexit && ir[1].o == IR_NE) {
+    emit_jcc(as, CC_E, nilexit);  /* XI_JMP is not found by lj_asm_patchexit. */
+    nilexit = NULL;
+  } else {
+    emit_loada(as, dest, niltvg(J2G(as->J)));
+  }
+
+  /* Follow hash chain until the end. */
+  l_loop = emit_sjcc_label(as, CC_NZ);
+  emit_rr(as, XO_TEST, dest, dest);
+  emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
+  l_next = emit_label(as);
+
+  /* Type and value comparison. */
+  if (nilexit)
+    emit_jcc(as, CC_E, nilexit);
+  else
+    emit_sjcc(as, CC_E, l_end);
+  if (irt_isnum(kt)) {
+    if (isk) {
+      /* Assumes -0.0 is already canonicalized to +0.0. */
+      emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
+		 (int32_t)ir_knum(irkey)->u32.lo);
+      emit_sjcc(as, CC_NE, l_next);
+      emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
+		 (int32_t)ir_knum(irkey)->u32.hi);
+    } else {
+      emit_sjcc(as, CC_P, l_next);
+      emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
+      emit_sjcc(as, CC_AE, l_next);
+      /* The type check avoids NaN penalties and complaints from Valgrind. */
+#if LJ_64
+      emit_u32(as, LJ_TISNUM);
+      emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
+#else
+      emit_i8(as, LJ_TISNUM);
+      emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
+#endif
+    }
+#if LJ_64
+  } else if (irt_islightud(kt)) {
+    emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
+#endif
+  } else {
+    if (!irt_ispri(kt)) {
+      lua_assert(irt_isaddr(kt));
+      if (isk)
+	emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr),
+		   ptr2addr(ir_kgc(irkey)));
+      else
+	emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr));
+      emit_sjcc(as, CC_NE, l_next);
+    }
+    lua_assert(!irt_isnil(kt));
+    emit_i8(as, irt_toitype(kt));
+    emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
+  }
+  emit_sfixup(as, l_loop);
+  checkmclim(as);
+
+  /* Load main position relative to tab->node into dest. */
+  khash = isk ? ir_khash(irkey) : 1;
+  if (khash == 0) {
+    emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
+  } else {
+    emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
+    if ((as->flags & JIT_F_PREFER_IMUL)) {
+      emit_i8(as, sizeof(Node));
+      emit_rr(as, XO_IMULi8, dest, dest);
+    } else {
+      emit_shifti(as, XOg_SHL, dest, 3);
+      emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
+    }
+    if (isk) {
+      emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
+      emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
+    } else if (irt_isstr(kt)) {
+      emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash));
+      emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
+    } else {  /* Must match with hashrot() in lj_tab.c. */
+      emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));
+      emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
+      emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
+      emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
+      emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
+      emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
+      emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
+      emit_rr(as, XO_ARITH(XOg_XOR), tmp, dest);
+      if (irt_isnum(kt)) {
+	emit_rr(as, XO_ARITH(XOg_ADD), dest, dest);
+#if LJ_64
+	emit_shifti(as, XOg_SHR|REX_64, dest, 32);
+	emit_rr(as, XO_MOV, tmp, dest);
+	emit_rr(as, XO_MOVDto, key|REX_64, dest);
+#else
+	emit_rmro(as, XO_MOV, dest, RID_ESP, ra_spill(as, irkey)+4);
+	emit_rr(as, XO_MOVDto, key, tmp);
+#endif
+      } else {
+	emit_rr(as, XO_MOV, tmp, key);
+	emit_rmro(as, XO_LEA, dest, key, HASH_BIAS);
+      }
+    }
+  }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+  IRIns *kslot = IR(ir->op2);
+  IRIns *irkey = IR(kslot->op1);
+  int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+  Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+  Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+#if !LJ_64
+  MCLabel l_exit;
+#endif
+  lua_assert(ofs % sizeof(Node) == 0);
+  if (ra_hasreg(dest)) {
+    if (ofs != 0) {
+      if (dest == node && !(as->flags & JIT_F_LEA_AGU))
+	emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
+      else
+	emit_rmro(as, XO_LEA, dest, node, ofs);
+    } else if (dest != node) {
+      emit_rr(as, XO_MOV, dest, node);
+    }
+  }
+  asm_guardcc(as, CC_NE);
+#if LJ_64
+  if (!irt_ispri(irkey->t)) {
+    Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node));
+    emit_rmro(as, XO_CMP, key|REX_64, node,
+	       ofs + (int32_t)offsetof(Node, key.u64));
+    lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
+    /* Assumes -0.0 is already canonicalized to +0.0. */
+    emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
+			  ((uint64_t)irt_toitype(irkey->t) << 32) |
+			  (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
+  } else {
+    lua_assert(!irt_isnil(irkey->t));
+    emit_i8(as, irt_toitype(irkey->t));
+    emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
+	      ofs + (int32_t)offsetof(Node, key.it));
+  }
+#else
+  l_exit = emit_label(as);
+  if (irt_isnum(irkey->t)) {
+    /* Assumes -0.0 is already canonicalized to +0.0. */
+    emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
+	       ofs + (int32_t)offsetof(Node, key.u32.lo),
+	       (int32_t)ir_knum(irkey)->u32.lo);
+    emit_sjcc(as, CC_NE, l_exit);
+    emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
+	       ofs + (int32_t)offsetof(Node, key.u32.hi),
+	       (int32_t)ir_knum(irkey)->u32.hi);
+  } else {
+    if (!irt_ispri(irkey->t)) {
+      lua_assert(irt_isgcv(irkey->t));
+      emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
+		 ofs + (int32_t)offsetof(Node, key.gcr),
+		 ptr2addr(ir_kgc(irkey)));
+      emit_sjcc(as, CC_NE, l_exit);
+    }
+    lua_assert(!irt_isnil(irkey->t));
+    emit_i8(as, irt_toitype(irkey->t));
+    emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
+	      ofs + (int32_t)offsetof(Node, key.it));
+  }
+#endif
+}
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+  IRRef args[3];
+  IRIns *irkey;
+  Reg tmp;
+  if (ir->r == RID_SINK)
+    return;
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ir->op1;      /* GCtab *t     */
+  args[2] = ASMREF_TMP1;  /* cTValue *key */
+  asm_setupresult(as, ir, ci);  /* TValue * */
+  asm_gencall(as, ci, args);
+  tmp = ra_releasetmp(as, ASMREF_TMP1);
+  irkey = IR(ir->op2);
+  if (irt_isnum(irkey->t)) {
+    /* For numbers use the constant itself or a spill slot as a TValue. */
+    if (irref_isk(ir->op2))
+      emit_loada(as, tmp, ir_knum(irkey));
+    else
+      emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
+  } else {
+    /* Otherwise use g->tmptv to hold the TValue. */
+    if (!irref_isk(ir->op2)) {
+      Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
+      emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
+    } else if (!irt_ispri(irkey->t)) {
+      emit_movmroi(as, tmp, 0, irkey->i);
+    }
+    if (!(LJ_64 && irt_islightud(irkey->t)))
+      emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
+    emit_loada(as, tmp, &J2G(as->J)->tmptv);
+  }
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+  /* NYI: Check that UREFO is still open and not aliasing a slot. */
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  if (irref_isk(ir->op1)) {
+    GCfunc *fn = ir_kfunc(IR(ir->op1));
+    MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+    emit_rma(as, XO_MOV, dest, v);
+  } else {
+    Reg uv = ra_scratch(as, RSET_GPR);
+    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+    if (ir->o == IR_UREFC) {
+      emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
+      asm_guardcc(as, CC_NE);
+      emit_i8(as, 1);
+      emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
+    } else {
+      emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
+    }
+    emit_rmro(as, XO_MOV, uv, func,
+	      (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
+  }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  asm_fusefref(as, ir, RSET_GPR);
+  emit_mrm(as, XO_LEA, dest, RID_MRM);
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  asm_fusestrref(as, ir, RSET_GPR);
+  if (as->mrm.base == RID_NONE)
+    emit_loadi(as, dest, as->mrm.ofs);
+  else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
+    emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
+  else
+    emit_mrm(as, XO_LEA, dest, RID_MRM);
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static void asm_fxload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+  x86Op xo;
+  if (ir->o == IR_FLOAD)
+    asm_fusefref(as, ir, RSET_GPR);
+  else
+    asm_fusexref(as, ir->op1, RSET_GPR);
+  /* ir->op2 is ignored -- unaligned loads are ok on x86. */
+  switch (irt_type(ir->t)) {
+  case IRT_I8: xo = XO_MOVSXb; break;
+  case IRT_U8: xo = XO_MOVZXb; break;
+  case IRT_I16: xo = XO_MOVSXw; break;
+  case IRT_U16: xo = XO_MOVZXw; break;
+  case IRT_NUM: xo = XMM_MOVRM(as); break;
+  case IRT_FLOAT: xo = XO_MOVSS; break;
+  default:
+    if (LJ_64 && irt_is64(ir->t))
+      dest |= REX_64;
+    else
+      lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
+    xo = XO_MOV;
+    break;
+  }
+  emit_mrm(as, xo, dest, RID_MRM);
+}
+
+static void asm_fxstore(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_GPR;
+  Reg src = RID_NONE, osrc = RID_NONE;
+  int32_t k = 0;
+  if (ir->r == RID_SINK)
+    return;
+  /* The IRT_I16/IRT_U16 stores should never be simplified for constant
+  ** values since mov word [mem], imm16 has a length-changing prefix.
+  */
+  if (irt_isi16(ir->t) || irt_isu16(ir->t) || irt_isfp(ir->t) ||
+      !asm_isk32(as, ir->op2, &k)) {
+    RegSet allow8 = irt_isfp(ir->t) ? RSET_FPR :
+		    (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR;
+    src = osrc = ra_alloc1(as, ir->op2, allow8);
+    if (!LJ_64 && !rset_test(allow8, src)) {  /* Already in wrong register. */
+      rset_clear(allow, osrc);
+      src = ra_scratch(as, allow8);
+    }
+    rset_clear(allow, src);
+  }
+  if (ir->o == IR_FSTORE) {
+    asm_fusefref(as, IR(ir->op1), allow);
+  } else {
+    asm_fusexref(as, ir->op1, allow);
+    if (LJ_32 && ir->o == IR_HIOP) as->mrm.ofs += 4;
+  }
+  if (ra_hasreg(src)) {
+    x86Op xo;
+    switch (irt_type(ir->t)) {
+    case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break;
+    case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
+    case IRT_NUM: xo = XO_MOVSDto; break;
+    case IRT_FLOAT: xo = XO_MOVSSto; break;
+#if LJ_64
+    case IRT_LIGHTUD: lua_assert(0);  /* NYI: mask 64 bit lightuserdata. */
+#endif
+    default:
+      if (LJ_64 && irt_is64(ir->t))
+	src |= REX_64;
+      else
+	lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
+      xo = XO_MOVto;
+      break;
+    }
+    emit_mrm(as, xo, src, RID_MRM);
+    if (!LJ_64 && src != osrc) {
+      ra_noweak(as, osrc);
+      emit_rr(as, XO_MOV, src, osrc);
+    }
+  } else {
+    if (irt_isi8(ir->t) || irt_isu8(ir->t)) {
+      emit_i8(as, k);
+      emit_mrm(as, XO_MOVmib, 0, RID_MRM);
+    } else {
+      lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
+		 irt_isaddr(ir->t));
+      emit_i32(as, k);
+      emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM);
+    }
+  }
+}
+
+#if LJ_64
+static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
+{
+  if (ra_used(ir) || typecheck) {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    if (typecheck) {
+      Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest));
+      asm_guardcc(as, CC_NE);
+      emit_i8(as, -2);
+      emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
+      emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
+      emit_rr(as, XO_MOV, tmp|REX_64, dest);
+    }
+    return dest;
+  } else {
+    return RID_NONE;
+  }
+}
+#endif
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+  lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+	     (LJ_DUALNUM && irt_isint(ir->t)));
+#if LJ_64
+  if (irt_islightud(ir->t)) {
+    Reg dest = asm_load_lightud64(as, ir, 1);
+    if (ra_hasreg(dest)) {
+      asm_fuseahuref(as, ir->op1, RSET_GPR);
+      emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
+    }
+    return;
+  } else
+#endif
+  if (ra_used(ir)) {
+    RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+    Reg dest = ra_dest(as, ir, allow);
+    asm_fuseahuref(as, ir->op1, RSET_GPR);
+    emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
+  } else {
+    asm_fuseahuref(as, ir->op1, RSET_GPR);
+  }
+  /* Always do the type check, even if the load result is unused. */
+  as->mrm.ofs += 4;
+  asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
+  if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
+    lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
+    emit_u32(as, LJ_TISNUM);
+    emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
+  } else {
+    emit_i8(as, irt_toitype(ir->t));
+    emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
+  }
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+  if (ir->r == RID_SINK)
+    return;
+  if (irt_isnum(ir->t)) {
+    Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
+    asm_fuseahuref(as, ir->op1, RSET_GPR);
+    emit_mrm(as, XO_MOVSDto, src, RID_MRM);
+#if LJ_64
+  } else if (irt_islightud(ir->t)) {
+    Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+    asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
+    emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
+#endif
+  } else {
+    IRIns *irr = IR(ir->op2);
+    RegSet allow = RSET_GPR;
+    Reg src = RID_NONE;
+    if (!irref_isk(ir->op2)) {
+      src = ra_alloc1(as, ir->op2, allow);
+      rset_clear(allow, src);
+    }
+    asm_fuseahuref(as, ir->op1, allow);
+    if (ra_hasreg(src)) {
+      emit_mrm(as, XO_MOVto, src, RID_MRM);
+    } else if (!irt_ispri(irr->t)) {
+      lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
+      emit_i32(as, irr->i);
+      emit_mrm(as, XO_MOVmi, 0, RID_MRM);
+    }
+    as->mrm.ofs += 4;
+    emit_i32(as, (int32_t)irt_toitype(ir->t));
+    emit_mrm(as, XO_MOVmi, 0, RID_MRM);
+  }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
+  IRType1 t = ir->t;
+  Reg base;
+  lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
+  lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+  lua_assert(LJ_DUALNUM ||
+	     !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
+  if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
+    Reg left = ra_scratch(as, RSET_FPR);
+    asm_tointg(as, ir, left);  /* Frees dest reg. Do this before base alloc. */
+    base = ra_alloc1(as, REF_BASE, RSET_GPR);
+    emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
+    t.irt = IRT_NUM;  /* Continue with a regular number type check. */
+#if LJ_64
+  } else if (irt_islightud(t)) {
+    Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
+    if (ra_hasreg(dest)) {
+      base = ra_alloc1(as, REF_BASE, RSET_GPR);
+      emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
+    }
+    return;
+#endif
+  } else if (ra_used(ir)) {
+    RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR;
+    Reg dest = ra_dest(as, ir, allow);
+    base = ra_alloc1(as, REF_BASE, RSET_GPR);
+    lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
+    if ((ir->op2 & IRSLOAD_CONVERT)) {
+      t.irt = irt_isint(t) ? IRT_NUM : IRT_INT;  /* Check for original type. */
+      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
+    } else if (irt_isnum(t)) {
+      emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
+    } else {
+      emit_rmro(as, XO_MOV, dest, base, ofs);
+    }
+  } else {
+    if (!(ir->op2 & IRSLOAD_TYPECHECK))
+      return;  /* No type check: avoid base alloc. */
+    base = ra_alloc1(as, REF_BASE, RSET_GPR);
+  }
+  if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+    /* Need type check, even if the load result is unused. */
+    asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
+    if (LJ_64 && irt_type(t) >= IRT_NUM) {
+      lua_assert(irt_isinteger(t) || irt_isnum(t));
+      emit_u32(as, LJ_TISNUM);
+      emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
+    } else {
+      emit_i8(as, irt_toitype(t));
+      emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
+    }
+  }
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+  CTState *cts = ctype_ctsG(J2G(as->J));
+  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
+  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
+	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+  IRRef args[2];
+  lua_assert(sz != CTSIZE_INVALID);
+
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCcdata * */
+
+  /* Initialize immutable cdata object. */
+  if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+#if LJ_64
+    Reg r64 = sz == 8 ? REX_64 : 0;
+    if (irref_isk(ir->op2)) {
+      IRIns *irk = IR(ir->op2);
+      uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 :
+					 (uint64_t)(uint32_t)irk->i;
+      if (sz == 4 || checki32((int64_t)k)) {
+	emit_i32(as, (int32_t)k);
+	emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
+      } else {
+	emit_movtomro(as, RID_ECX + r64, RID_RET, sizeof(GCcdata));
+	emit_loadu64(as, RID_ECX, k);
+      }
+    } else {
+      Reg r = ra_alloc1(as, ir->op2, allow);
+      emit_movtomro(as, r + r64, RID_RET, sizeof(GCcdata));
+    }
+#else
+    int32_t ofs = sizeof(GCcdata);
+    if (sz == 8) {
+      ofs += 4; ir++;
+      lua_assert(ir->o == IR_HIOP);
+    }
+    do {
+      if (irref_isk(ir->op2)) {
+	emit_movmroi(as, RID_RET, ofs, IR(ir->op2)->i);
+      } else {
+	Reg r = ra_alloc1(as, ir->op2, allow);
+	emit_movtomro(as, r, RID_RET, ofs);
+	rset_clear(allow, r);
+      }
+      if (ofs == sizeof(GCcdata)) break;
+      ofs -= 4; ir--;
+    } while (1);
+#endif
+    lua_assert(sz == 4 || sz == 8);
+  }
+
+  /* Combine initialization of marked, gct and ctypeid. */
+  emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
+  emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
+	   (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16)));
+  emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
+  emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
+
+  asm_gencall(as, ci, args);
+  emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
+}
+#else
+#define asm_cnew(as, ir)	((void)0)
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+  MCLabel l_end = emit_label(as);
+  emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
+  emit_setgl(as, tab, gc.grayagain);
+  emit_getgl(as, tmp, gc.grayagain);
+  emit_i8(as, ~LJ_GC_BLACK);
+  emit_rmro(as, XO_ARITHib, XOg_AND, tab, offsetof(GCtab, marked));
+  emit_sjcc(as, CC_Z, l_end);
+  emit_i8(as, LJ_GC_BLACK);
+  emit_rmro(as, XO_GROUP3b, XOg_TEST, tab, offsetof(GCtab, marked));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg obj;
+  /* No need for other object barriers (yet). */
+  lua_assert(IR(ir->op1)->o == IR_UREFC);
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ir->op1;      /* TValue *tv      */
+  asm_gencall(as, ci, args);
+  emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J));
+  obj = IR(ir->op1)->r;
+  emit_sjcc(as, CC_Z, l_end);
+  emit_i8(as, LJ_GC_WHITES);
+  if (irref_isk(ir->op2)) {
+    GCobj *vp = ir_kgc(IR(ir->op2));
+    emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked);
+  } else {
+    Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj));
+    emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
+  }
+  emit_sjcc(as, CC_Z, l_end);
+  emit_i8(as, LJ_GC_BLACK);
+  emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
+	    (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
+}
+
+/* -- FP/int arithmetic and logic operations ------------------------------ */
+
+/* Load reference onto x87 stack. Force a spill to memory if needed. */
+static void asm_x87load(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ir->o == IR_KNUM) {
+    cTValue *tv = ir_knum(ir);
+    if (tvispzero(tv))  /* Use fldz only for +0. */
+      emit_x87op(as, XI_FLDZ);
+    else if (tvispone(tv))
+      emit_x87op(as, XI_FLD1);
+    else
+      emit_rma(as, XO_FLDq, XOg_FLDq, tv);
+  } else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT && !ra_used(ir) &&
+	     !irref_isk(ir->op1) && mayfuse(as, ir->op1)) {
+    IRIns *iri = IR(ir->op1);
+    emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri));
+  } else {
+    emit_mrm(as, XO_FLDq, XOg_FLDq, asm_fuseload(as, ref, RSET_EMPTY));
+  }
+}
+
+/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
+static int fpmjoin_pow(ASMState *as, IRIns *ir)
+{
+  IRIns *irp = IR(ir->op1);
+  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+    IRIns *irpp = IR(irp->op1);
+    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+      /* The modified regs must match with the *.dasc implementation. */
+      RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
+      IRIns *irx;
+      if (ra_hasreg(ir->r))
+	rset_clear(drop, ir->r);  /* Dest reg handled below. */
+      ra_evictset(as, drop);
+      ra_destreg(as, ir, RID_XMM0);
+      emit_call(as, lj_vm_pow_sse);
+      irx = IR(irpp->op1);
+      if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
+	irx->r = RID_INIT;  /* Avoid allocating xmm1 for x. */
+      ra_left(as, RID_XMM0, irpp->op1);
+      ra_left(as, RID_XMM1, irp->op2);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
+  if (fpm == IRFPM_SQRT) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
+    emit_mrm(as, XO_SQRTSD, dest, left);
+  } else if (fpm <= IRFPM_TRUNC) {
+    if (as->flags & JIT_F_SSE4_1) {  /* SSE4.1 has a rounding instruction. */
+      Reg dest = ra_dest(as, ir, RSET_FPR);
+      Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
+      /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op.
+      ** Let's pretend it's a 3-byte opcode, and compensate afterwards.
+      ** This is atrocious, but the alternatives are much worse.
+      */
+      /* Round down/up/trunc == 1001/1010/1011. */
+      emit_i8(as, 0x09 + fpm);
+      emit_mrm(as, XO_ROUNDSD, dest, left);
+      if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) {
+	as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f;  /* Swap 0F and REX. */
+      }
+      *--as->mcp = 0x66;  /* 1st byte of ROUNDSD opcode. */
+    } else {  /* Call helper functions for SSE2 variant. */
+      /* The modified regs must match with the *.dasc implementation. */
+      RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+      if (ra_hasreg(ir->r))
+	rset_clear(drop, ir->r);  /* Dest reg handled below. */
+      ra_evictset(as, drop);
+      ra_destreg(as, ir, RID_XMM0);
+      emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse :
+		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
+      ra_left(as, RID_XMM0, ir->op1);
+    }
+  } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
+    /* Rejoined to pow(). */
+  } else {  /* Handle x87 ops. */
+    int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
+    Reg dest = ir->r;
+    if (ra_hasreg(dest)) {
+      ra_free(as, dest);
+      ra_modified(as, dest);
+      emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
+    }
+    emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+    switch (fpm) {  /* st0 = lj_vm_*(st0) */
+    case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
+    case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
+    case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
+    case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
+    case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
+    case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
+      /* Note: the use of fyl2xp1 would be pointless here. When computing
+      ** log(1.0+eps) the precision is already lost after 1.0 is added.
+      ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
+      */
+      emit_x87op(as, XI_FYL2X); break;
+    case IRFPM_OTHER:
+      switch (ir->o) {
+      case IR_ATAN2:
+	emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
+      case IR_LDEXP:
+	emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
+      default: lua_assert(0); break;
+      }
+      break;
+    default: lua_assert(0); break;
+    }
+    asm_x87load(as, ir->op1);
+    switch (fpm) {
+    case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
+    case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
+    case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
+    case IRFPM_OTHER:
+      if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
+      break;
+    default: break;
+    }
+  }
+}
+
+static void asm_fppowi(ASMState *as, IRIns *ir)
+{
+  /* The modified regs must match with the *.dasc implementation. */
+  RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);
+  ra_destreg(as, ir, RID_XMM0);
+  emit_call(as, lj_vm_powi_sse);
+  ra_left(as, RID_XMM0, ir->op1);
+  ra_left(as, RID_EAX, ir->op2);
+}
+
+#if LJ_64 && LJ_HASFFI
+static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[id];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+#endif
+
+static void asm_intmod(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static int asm_swapops(ASMState *as, IRIns *ir)
+{
+  IRIns *irl = IR(ir->op1);
+  IRIns *irr = IR(ir->op2);
+  lua_assert(ra_noreg(irr->r));
+  if (!irm_iscomm(lj_ir_mode[ir->o]))
+    return 0;  /* Can't swap non-commutative operations. */
+  if (irref_isk(ir->op2))
+    return 0;  /* Don't swap constants to the left. */
+  if (ra_hasreg(irl->r))
+    return 1;  /* Swap if left already has a register. */
+  if (ra_samehint(ir->r, irr->r))
+    return 1;  /* Swap if dest and right have matching hints. */
+  if (as->curins > as->loopref) {  /* In variant part? */
+    if (ir->op2 < as->loopref && !irt_isphi(irr->t))
+      return 0;  /* Keep invariants on the right. */
+    if (ir->op1 < as->loopref && !irt_isphi(irl->t))
+      return 1;  /* Swap invariants to the right. */
+  }
+  if (opisfusableload(irl->o))
+    return 1;  /* Swap fusable loads to the right. */
+  return 0;  /* Otherwise don't swap. */
+}
+
+static void asm_fparith(ASMState *as, IRIns *ir, x86Op xo)
+{
+  IRRef lref = ir->op1;
+  IRRef rref = ir->op2;
+  RegSet allow = RSET_FPR;
+  Reg dest;
+  Reg right = IR(rref)->r;
+  if (ra_hasreg(right)) {
+    rset_clear(allow, right);
+    ra_noweak(as, right);
+  }
+  dest = ra_dest(as, ir, allow);
+  if (lref == rref) {
+    right = dest;
+  } else if (ra_noreg(right)) {
+    if (asm_swapops(as, ir)) {
+      IRRef tmp = lref; lref = rref; rref = tmp;
+    }
+    right = asm_fuseload(as, rref, rset_clear(allow, dest));
+  }
+  emit_mrm(as, xo, dest, right);
+  ra_left(as, dest, lref);
+}
+
+static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
+{
+  IRRef lref = ir->op1;
+  IRRef rref = ir->op2;
+  RegSet allow = RSET_GPR;
+  Reg dest, right;
+  int32_t k = 0;
+  if (as->flagmcp == as->mcp) {  /* Drop test r,r instruction. */
+    MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2);
+    if ((p[1] & 15) < 14) {
+      if ((p[1] & 15) >= 12) p[1] -= 4;  /* L <->S, NL <-> NS */
+      as->flagmcp = NULL;
+      as->mcp = p;
+    }  /* else: cannot transform LE/NLE to cc without use of OF. */
+  }
+  right = IR(rref)->r;
+  if (ra_hasreg(right)) {
+    rset_clear(allow, right);
+    ra_noweak(as, right);
+  }
+  dest = ra_dest(as, ir, allow);
+  if (lref == rref) {
+    right = dest;
+  } else if (ra_noreg(right) && !asm_isk32(as, rref, &k)) {
+    if (asm_swapops(as, ir)) {
+      IRRef tmp = lref; lref = rref; rref = tmp;
+    }
+    right = asm_fuseloadm(as, rref, rset_clear(allow, dest), irt_is64(ir->t));
+  }
+  if (irt_isguard(ir->t))  /* For IR_ADDOV etc. */
+    asm_guardcc(as, CC_O);
+  if (xa != XOg_X_IMUL) {
+    if (ra_hasreg(right))
+      emit_mrm(as, XO_ARITH(xa), REX_64IR(ir, dest), right);
+    else
+      emit_gri(as, XG_ARITHi(xa), REX_64IR(ir, dest), k);
+  } else if (ra_hasreg(right)) {  /* IMUL r, mrm. */
+    emit_mrm(as, XO_IMUL, REX_64IR(ir, dest), right);
+  } else {  /* IMUL r, r, k. */
+    /* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */
+    Reg left = asm_fuseloadm(as, lref, RSET_GPR, irt_is64(ir->t));
+    x86Op xo;
+    if (checki8(k)) { emit_i8(as, k); xo = XO_IMULi8;
+    } else { emit_i32(as, k); xo = XO_IMULi; }
+    emit_mrm(as, xo, REX_64IR(ir, dest), left);
+    return;
+  }
+  ra_left(as, dest, lref);
+}
+
+/* LEA is really a 4-operand ADD with an independent destination register,
+** up to two source registers and an immediate. One register can be scaled
+** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several
+** instructions.
+**
+** Currently only a few common cases are supported:
+** - 3-operand ADD:    y = a+b; y = a+k   with a and b already allocated
+** - Left ADD fusion:  y = (a+b)+k; y = (a+k)+b
+** - Right ADD fusion: y = a+(b+k)
+** The ommited variants have already been reduced by FOLD.
+**
+** There are more fusion opportunities, like gathering shifts or joining
+** common references. But these are probably not worth the trouble, since
+** array indexing is not decomposed and already makes use of all fields
+** of the ModRM operand.
+*/
+static int asm_lea(ASMState *as, IRIns *ir)
+{
+  IRIns *irl = IR(ir->op1);
+  IRIns *irr = IR(ir->op2);
+  RegSet allow = RSET_GPR;
+  Reg dest;
+  as->mrm.base = as->mrm.idx = RID_NONE;
+  as->mrm.scale = XM_SCALE1;
+  as->mrm.ofs = 0;
+  if (ra_hasreg(irl->r)) {
+    rset_clear(allow, irl->r);
+    ra_noweak(as, irl->r);
+    as->mrm.base = irl->r;
+    if (irref_isk(ir->op2) || ra_hasreg(irr->r)) {
+      /* The PHI renaming logic does a better job in some cases. */
+      if (ra_hasreg(ir->r) &&
+	  ((irt_isphi(irl->t) && as->phireg[ir->r] == ir->op1) ||
+	   (irt_isphi(irr->t) && as->phireg[ir->r] == ir->op2)))
+	return 0;
+      if (irref_isk(ir->op2)) {
+	as->mrm.ofs = irr->i;
+      } else {
+	rset_clear(allow, irr->r);
+	ra_noweak(as, irr->r);
+	as->mrm.idx = irr->r;
+      }
+    } else if (irr->o == IR_ADD && mayfuse(as, ir->op2) &&
+	       irref_isk(irr->op2)) {
+      Reg idx = ra_alloc1(as, irr->op1, allow);
+      rset_clear(allow, idx);
+      as->mrm.idx = (uint8_t)idx;
+      as->mrm.ofs = IR(irr->op2)->i;
+    } else {
+      return 0;
+    }
+  } else if (ir->op1 != ir->op2 && irl->o == IR_ADD && mayfuse(as, ir->op1) &&
+	     (irref_isk(ir->op2) || irref_isk(irl->op2))) {
+    Reg idx, base = ra_alloc1(as, irl->op1, allow);
+    rset_clear(allow, base);
+    as->mrm.base = (uint8_t)base;
+    if (irref_isk(ir->op2)) {
+      as->mrm.ofs = irr->i;
+      idx = ra_alloc1(as, irl->op2, allow);
+    } else {
+      as->mrm.ofs = IR(irl->op2)->i;
+      idx = ra_alloc1(as, ir->op2, allow);
+    }
+    rset_clear(allow, idx);
+    as->mrm.idx = (uint8_t)idx;
+  } else {
+    return 0;
+  }
+  dest = ra_dest(as, ir, allow);
+  emit_mrm(as, XO_LEA, dest, RID_MRM);
+  return 1;  /* Success. */
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_ADDSD);
+  else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp ||
+	   irt_is64(ir->t) || !asm_lea(as, ir))
+    asm_intarith(as, ir, XOg_ADD);
+}
+
+static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  emit_rr(as, XO_GROUP3, REX_64IR(ir, xg), dest);
+  ra_left(as, dest, ir->op1);
+}
+
+static void asm_min_max(ASMState *as, IRIns *ir, int cc)
+{
+  Reg right, dest = ra_dest(as, ir, RSET_GPR);
+  IRRef lref = ir->op1, rref = ir->op2;
+  if (irref_isk(rref)) { lref = rref; rref = ir->op1; }
+  right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, dest));
+  emit_rr(as, XO_CMOV + (cc<<24), REX_64IR(ir, dest), right);
+  emit_rr(as, XO_CMP, REX_64IR(ir, dest), right);
+  ra_left(as, dest, lref);
+}
+
+static void asm_bitswap(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
+		    REX_64IR(ir, 0), dest, 0, as->mcp, 1);
+  ra_left(as, dest, ir->op1);
+}
+
+static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
+{
+  IRRef rref = ir->op2;
+  IRIns *irr = IR(rref);
+  Reg dest;
+  if (irref_isk(rref)) {  /* Constant shifts. */
+    int shift;
+    dest = ra_dest(as, ir, RSET_GPR);
+    shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
+    switch (shift) {
+    case 0: break;
+    case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
+    default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
+    }
+  } else {  /* Variable shifts implicitly use register cl (i.e. ecx). */
+    Reg right;
+    dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
+    if (dest == RID_ECX) {
+      dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX));
+      emit_rr(as, XO_MOV, RID_ECX, dest);
+    }
+    right = irr->r;
+    if (ra_noreg(right))
+      right = ra_allocref(as, rref, RID2RSET(RID_ECX));
+    else if (right != RID_ECX)
+      ra_scratch(as, RID2RSET(RID_ECX));
+    emit_rr(as, XO_SHIFTcl, REX_64IR(ir, xs), dest);
+    ra_noweak(as, right);
+    if (right != RID_ECX)
+      emit_rr(as, XO_MOV, RID_ECX, right);
+  }
+  ra_left(as, dest, ir->op1);
+  /*
+  ** Note: avoid using the flags resulting from a shift or rotate!
+  ** All of them cause a partial flag stall, except for r,1 shifts
+  ** (but not rotates). And a shift count of 0 leaves the flags unmodified.
+  */
+}
+
+/* -- Comparisons --------------------------------------------------------- */
+
+/* Virtual flags for unordered FP comparisons. */
+#define VCC_U	0x1000		/* Unordered. */
+#define VCC_P	0x2000		/* Needs extra CC_P branch. */
+#define VCC_S	0x4000		/* Swap avoids CC_P branch. */
+#define VCC_PS	(VCC_P|VCC_S)
+
+/* Map of comparisons to flags. ORDER IR. */
+#define COMPFLAGS(ci, cin, cu, cf)	((ci)+((cu)<<4)+((cin)<<8)+(cf))
+static const uint16_t asm_compmap[IR_ABC+1] = {
+  /*                 signed non-eq unsigned flags */
+  /* LT  */ COMPFLAGS(CC_GE, CC_G,  CC_AE, VCC_PS),
+  /* GE  */ COMPFLAGS(CC_L,  CC_L,  CC_B,  0),
+  /* LE  */ COMPFLAGS(CC_G,  CC_G,  CC_A,  VCC_PS),
+  /* GT  */ COMPFLAGS(CC_LE, CC_L,  CC_BE, 0),
+  /* ULT */ COMPFLAGS(CC_AE, CC_A,  CC_AE, VCC_U),
+  /* UGE */ COMPFLAGS(CC_B,  CC_B,  CC_B,  VCC_U|VCC_PS),
+  /* ULE */ COMPFLAGS(CC_A,  CC_A,  CC_A,  VCC_U),
+  /* UGT */ COMPFLAGS(CC_BE, CC_B,  CC_BE, VCC_U|VCC_PS),
+  /* EQ  */ COMPFLAGS(CC_NE, CC_NE, CC_NE, VCC_P),
+  /* NE  */ COMPFLAGS(CC_E,  CC_E,  CC_E,  VCC_U|VCC_P),
+  /* ABC */ COMPFLAGS(CC_BE, CC_B,  CC_BE, VCC_U|VCC_PS)  /* Same as UGT. */
+};
+
+/* FP and integer comparisons. */
+static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
+{
+  if (irt_isnum(ir->t)) {
+    IRRef lref = ir->op1;
+    IRRef rref = ir->op2;
+    Reg left, right;
+    MCLabel l_around;
+    /*
+    ** An extra CC_P branch is required to preserve ordered/unordered
+    ** semantics for FP comparisons. This can be avoided by swapping
+    ** the operands and inverting the condition (except for EQ and UNE).
+    ** So always try to swap if possible.
+    **
+    ** Another option would be to swap operands to achieve better memory
+    ** operand fusion. But it's unlikely that this outweighs the cost
+    ** of the extra branches.
+    */
+    if (cc & VCC_S) {  /* Swap? */
+      IRRef tmp = lref; lref = rref; rref = tmp;
+      cc ^= (VCC_PS|(5<<4));  /* A <-> B, AE <-> BE, PS <-> none */
+    }
+    left = ra_alloc1(as, lref, RSET_FPR);
+    right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
+    l_around = emit_label(as);
+    asm_guardcc(as, cc >> 4);
+    if (cc & VCC_P) {  /* Extra CC_P branch required? */
+      if (!(cc & VCC_U)) {
+	asm_guardcc(as, CC_P);  /* Branch to exit for ordered comparisons. */
+      } else if (l_around != as->invmcp) {
+	emit_sjcc(as, CC_P, l_around);  /* Branch around for unordered. */
+      } else {
+	/* Patched to mcloop by asm_loop_fixup. */
+	as->loopinv = 2;
+	if (as->realign)
+	  emit_sjcc(as, CC_P, as->mcp);
+	else
+	  emit_jcc(as, CC_P, as->mcp);
+      }
+    }
+    emit_mrm(as, XO_UCOMISD, left, right);
+  } else {
+    IRRef lref = ir->op1, rref = ir->op2;
+    IROp leftop = (IROp)(IR(lref)->o);
+    Reg r64 = REX_64IR(ir, 0);
+    int32_t imm = 0;
+    lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
+	       irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
+    /* Swap constants (only for ABC) and fusable loads to the right. */
+    if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
+      if ((cc & 0xc) == 0xc) cc ^= 0x53;  /* L <-> G, LE <-> GE */
+      else if ((cc & 0xa) == 0x2) cc ^= 0x55;  /* A <-> B, AE <-> BE */
+      lref = ir->op2; rref = ir->op1;
+    }
+    if (asm_isk32(as, rref, &imm)) {
+      IRIns *irl = IR(lref);
+      /* Check wether we can use test ins. Not for unsigned, since CF=0. */
+      int usetest = (imm == 0 && (cc & 0xa) != 0x2);
+      if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) {
+	/* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */
+	Reg right, left = RID_NONE;
+	RegSet allow = RSET_GPR;
+	if (!asm_isk32(as, irl->op2, &imm)) {
+	  left = ra_alloc1(as, irl->op2, allow);
+	  rset_clear(allow, left);
+	} else {  /* Try to Fuse IRT_I8/IRT_U8 loads, too. See below. */
+	  IRIns *irll = IR(irl->op1);
+	  if (opisfusableload((IROp)irll->o) &&
+	      (irt_isi8(irll->t) || irt_isu8(irll->t))) {
+	    IRType1 origt = irll->t;  /* Temporarily flip types. */
+	    irll->t.irt = (irll->t.irt & ~IRT_TYPE) | IRT_INT;
+	    as->curins--;  /* Skip to BAND to avoid failing in noconflict(). */
+	    right = asm_fuseload(as, irl->op1, RSET_GPR);
+	    as->curins++;
+	    irll->t = origt;
+	    if (right != RID_MRM) goto test_nofuse;
+	    /* Fusion succeeded, emit test byte mrm, imm8. */
+	    asm_guardcc(as, cc);
+	    emit_i8(as, (imm & 0xff));
+	    emit_mrm(as, XO_GROUP3b, XOg_TEST, RID_MRM);
+	    return;
+	  }
+	}
+	as->curins--;  /* Skip to BAND to avoid failing in noconflict(). */
+	right = asm_fuseloadm(as, irl->op1, allow, r64);
+	as->curins++;  /* Undo the above. */
+      test_nofuse:
+	asm_guardcc(as, cc);
+	if (ra_noreg(left)) {
+	  emit_i32(as, imm);
+	  emit_mrm(as, XO_GROUP3, r64 + XOg_TEST, right);
+	} else {
+	  emit_mrm(as, XO_TEST, r64 + left, right);
+	}
+      } else {
+	Reg left;
+	if (opisfusableload((IROp)irl->o) &&
+	    ((irt_isu8(irl->t) && checku8(imm)) ||
+	     ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) ||
+	     (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) {
+	  /* Only the IRT_INT case is fused by asm_fuseload.
+	  ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads
+	  ** are handled here.
+	  ** Note that cmp word [mem], imm16 should not be generated,
+	  ** since it has a length-changing prefix. Compares of a word
+	  ** against a sign-extended imm8 are ok, however.
+	  */
+	  IRType1 origt = irl->t;  /* Temporarily flip types. */
+	  irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
+	  left = asm_fuseload(as, lref, RSET_GPR);
+	  irl->t = origt;
+	  if (left == RID_MRM) {  /* Fusion succeeded? */
+	    if (irt_isu8(irl->t) || irt_isu16(irl->t))
+	      cc >>= 4;  /* Need unsigned compare. */
+	    asm_guardcc(as, cc);
+	    emit_i8(as, imm);
+	    emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
+			 XO_ARITHib : XO_ARITHiw8, r64 + XOg_CMP, RID_MRM);
+	    return;
+	  }  /* Otherwise handle register case as usual. */
+	} else {
+	  left = asm_fuseloadm(as, lref,
+			       irt_isu8(ir->t) ? RSET_GPR8 : RSET_GPR, r64);
+	}
+	asm_guardcc(as, cc);
+	if (usetest && left != RID_MRM) {
+	  /* Use test r,r instead of cmp r,0. */
+	  x86Op xo = XO_TEST;
+	  if (irt_isu8(ir->t)) {
+	    lua_assert(ir->o == IR_EQ || ir->o == IR_NE);
+	    xo = XO_TESTb;
+	    if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) {
+	      if (LJ_64) {
+		left |= FORCE_REX;
+	      } else {
+		emit_i32(as, 0xff);
+		emit_mrm(as, XO_GROUP3, XOg_TEST, left);
+		return;
+	      }
+	    }
+	  }
+	  emit_rr(as, xo, r64 + left, left);
+	  if (irl+1 == ir)  /* Referencing previous ins? */
+	    as->flagmcp = as->mcp;  /* Set flag to drop test r,r if possible. */
+	} else {
+	  emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm);
+	}
+      }
+    } else {
+      Reg left = ra_alloc1(as, lref, RSET_GPR);
+      Reg right = asm_fuseloadm(as, rref, rset_exclude(RSET_GPR, left), r64);
+      asm_guardcc(as, cc);
+      emit_mrm(as, XO_CMP, r64 + left, right);
+    }
+  }
+}
+
+#if LJ_32 && LJ_HASFFI
+/* 64 bit integer comparisons in 32 bit mode. */
+static void asm_comp_int64(ASMState *as, IRIns *ir)
+{
+  uint32_t cc = asm_compmap[(ir-1)->o];
+  RegSet allow = RSET_GPR;
+  Reg lefthi = RID_NONE, leftlo = RID_NONE;
+  Reg righthi = RID_NONE, rightlo = RID_NONE;
+  MCLabel l_around;
+  x86ModRM mrm;
+
+  as->curins--;  /* Skip loword ins. Avoids failing in noconflict(), too. */
+
+  /* Allocate/fuse hiword operands. */
+  if (irref_isk(ir->op2)) {
+    lefthi = asm_fuseload(as, ir->op1, allow);
+  } else {
+    lefthi = ra_alloc1(as, ir->op1, allow);
+    rset_clear(allow, lefthi);
+    righthi = asm_fuseload(as, ir->op2, allow);
+    if (righthi == RID_MRM) {
+      if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base);
+      if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx);
+    } else {
+      rset_clear(allow, righthi);
+    }
+  }
+  mrm = as->mrm;  /* Save state for hiword instruction. */
+
+  /* Allocate/fuse loword operands. */
+  if (irref_isk((ir-1)->op2)) {
+    leftlo = asm_fuseload(as, (ir-1)->op1, allow);
+  } else {
+    leftlo = ra_alloc1(as, (ir-1)->op1, allow);
+    rset_clear(allow, leftlo);
+    rightlo = asm_fuseload(as, (ir-1)->op2, allow);
+  }
+
+  /* All register allocations must be performed _before_ this point. */
+  l_around = emit_label(as);
+  as->invmcp = as->flagmcp = NULL;  /* Cannot use these optimizations. */
+
+  /* Loword comparison and branch. */
+  asm_guardcc(as, cc >> 4);  /* Always use unsigned compare for loword. */
+  if (ra_noreg(rightlo)) {
+    int32_t imm = IR((ir-1)->op2)->i;
+    if (imm == 0 && ((cc >> 4) & 0xa) != 0x2 && leftlo != RID_MRM)
+      emit_rr(as, XO_TEST, leftlo, leftlo);
+    else
+      emit_gmrmi(as, XG_ARITHi(XOg_CMP), leftlo, imm);
+  } else {
+    emit_mrm(as, XO_CMP, leftlo, rightlo);
+  }
+
+  /* Hiword comparison and branches. */
+  if ((cc & 15) != CC_NE)
+    emit_sjcc(as, CC_NE, l_around);  /* Hiword unequal: skip loword compare. */
+  if ((cc & 15) != CC_E)
+    asm_guardcc(as, cc >> 8);  /* Hiword compare without equality check. */
+  as->mrm = mrm;  /* Restore state. */
+  if (ra_noreg(righthi)) {
+    int32_t imm = IR(ir->op2)->i;
+    if (imm == 0 && (cc & 0xa) != 0x2 && lefthi != RID_MRM)
+      emit_rr(as, XO_TEST, lefthi, lefthi);
+    else
+      emit_gmrmi(as, XG_ARITHi(XOg_CMP), lefthi, imm);
+  } else {
+    emit_mrm(as, XO_CMP, lefthi, righthi);
+  }
+}
+#endif
+
+/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+
+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+#if LJ_32 && LJ_HASFFI
+  /* HIOP is marked as a store because it needs its own DCE logic. */
+  int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
+  if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+  if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
+    if (usehi || uselo) {
+      if (irt_isfp(ir->t))
+	asm_conv_fp_int64(as, ir);
+      else
+	asm_conv_int64_fp(as, ir);
+    }
+    as->curins--;  /* Always skip the CONV. */
+    return;
+  } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
+    asm_comp_int64(as, ir);
+    return;
+  } else if ((ir-1)->o == IR_XSTORE) {
+    if ((ir-1)->r != RID_SINK)
+      asm_fxstore(as, ir);
+    return;
+  }
+  if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
+  switch ((ir-1)->o) {
+  case IR_ADD:
+    as->flagmcp = NULL;
+    as->curins--;
+    asm_intarith(as, ir, XOg_ADC);
+    asm_intarith(as, ir-1, XOg_ADD);
+    break;
+  case IR_SUB:
+    as->flagmcp = NULL;
+    as->curins--;
+    asm_intarith(as, ir, XOg_SBB);
+    asm_intarith(as, ir-1, XOg_SUB);
+    break;
+  case IR_NEG: {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    emit_rr(as, XO_GROUP3, XOg_NEG, dest);
+    emit_i8(as, 0);
+    emit_rr(as, XO_ARITHi8, XOg_ADC, dest);
+    ra_left(as, dest, ir->op1);
+    as->curins--;
+    asm_neg_not(as, ir-1, XOg_NEG);
+    break;
+    }
+  case IR_CALLN:
+  case IR_CALLXS:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
+    break;
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by CNEWI itself. */
+    break;
+  default: lua_assert(0); break;
+  }
+#else
+  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused on x64 or without FFI. */
+#endif
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as, BCReg topslot,
+			    IRIns *irp, RegSet allow, ExitNo exitno)
+{
+  /* Try to get an unused temp. register, otherwise spill/restore eax. */
+  Reg pbase = irp ? irp->r : RID_BASE;
+  Reg r = allow ? rset_pickbot(allow) : RID_EAX;
+  emit_jcc(as, CC_B, exitstub_addr(as->J, exitno));
+  if (allow == RSET_EMPTY)  /* Restore temp. register. */
+    emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
+  else
+    ra_modified(as, r);
+  emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
+  if (ra_hasreg(pbase) && pbase != r)
+    emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
+  else
+    emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
+	      ptr2addr(&J2G(as->J)->jit_base));
+  emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
+  emit_getgl(as, r, jit_L);
+  if (allow == RSET_EMPTY)  /* Spill temp. register. */
+    emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+  SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
+  MSize n, nent = snap->nent;
+  /* Store the value of all modified slots to the Lua stack. */
+  for (n = 0; n < nent; n++) {
+    SnapEntry sn = map[n];
+    BCReg s = snap_slot(sn);
+    int32_t ofs = 8*((int32_t)s-1);
+    IRRef ref = snap_ref(sn);
+    IRIns *ir = IR(ref);
+    if ((sn & SNAP_NORESTORE))
+      continue;
+    if (irt_isnum(ir->t)) {
+      Reg src = ra_alloc1(as, ref, RSET_FPR);
+      emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
+    } else {
+      lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+		 (LJ_DUALNUM && irt_isinteger(ir->t)));
+      if (!irref_isk(ref)) {
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
+	emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
+      } else if (!irt_ispri(ir->t)) {
+	emit_movmroi(as, RID_BASE, ofs, ir->i);
+      }
+      if ((sn & (SNAP_CONT|SNAP_FRAME))) {
+	if (s != 0)  /* Do not overwrite link to previous frame. */
+	  emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
+      } else {
+	if (!(LJ_64 && irt_islightud(ir->t)))
+	  emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
+      }
+    }
+    checkmclim(as);
+  }
+  lua_assert(map + nent == flinks);
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg tmp;
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+  asm_guardcc(as, CC_NE);  /* Assumes asm_snap_prep() already done. */
+  emit_rr(as, XO_TEST, RID_RET, RID_RET);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ASMREF_TMP2;  /* MSize steps     */
+  asm_gencall(as, ci, args);
+  tmp = ra_releasetmp(as, ASMREF_TMP1);
+  emit_loada(as, tmp, J2G(as->J));
+  emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
+  /* Jump around GC step if GC total < GC threshold. */
+  emit_sjcc(as, CC_B, l_end);
+  emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
+  emit_getgl(as, tmp, gc.total);
+  as->gcsteps = 0;
+  checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+  MCode *p = as->mctop;
+  MCode *target = as->mcp;
+  if (as->realign) {  /* Realigned loops use short jumps. */
+    as->realign = NULL;  /* Stop another retry. */
+    lua_assert(((intptr_t)target & 15) == 0);
+    if (as->loopinv) {  /* Inverted loop branch? */
+      p -= 5;
+      p[0] = XI_JMP;
+      lua_assert(target - p >= -128);
+      p[-1] = (MCode)(target - p);  /* Patch sjcc. */
+      if (as->loopinv == 2)
+	p[-3] = (MCode)(target - p + 2);  /* Patch opt. short jp. */
+    } else {
+      lua_assert(target - p >= -128);
+      p[-1] = (MCode)(int8_t)(target - p);  /* Patch short jmp. */
+      p[-2] = XI_JMPs;
+    }
+  } else {
+    MCode *newloop;
+    p[-5] = XI_JMP;
+    if (as->loopinv) {  /* Inverted loop branch? */
+      /* asm_guardcc already inverted the jcc and patched the jmp. */
+      p -= 5;
+      newloop = target+4;
+      *(int32_t *)(p-4) = (int32_t)(target - p);  /* Patch jcc. */
+      if (as->loopinv == 2) {
+	*(int32_t *)(p-10) = (int32_t)(target - p + 6);  /* Patch opt. jp. */
+	newloop = target+8;
+      }
+    } else {  /* Otherwise just patch jmp. */
+      *(int32_t *)(p-4) = (int32_t)(target - p);
+      newloop = target+3;
+    }
+    /* Realign small loops and shorten the loop branch. */
+    if (newloop >= p - 128) {
+      as->realign = newloop;  /* Force a retry and remember alignment. */
+      as->curins = as->stopins;  /* Abort asm_trace now. */
+      as->T->nins = as->orignins;  /* Remove any added renames. */
+    }
+  }
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (r != RID_BASE)
+      emit_rr(as, XO_MOV, r, RID_BASE);
+  }
+}
+
+/* Coalesce or reload BASE register for a side trace. */
+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+{
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (irp->r == r) {
+      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+    } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
+      rset_clear(allow, irp->r);
+      emit_rr(as, XO_MOV, r, irp->r);  /* Move from coalesced parent reg. */
+    } else {
+      emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
+    }
+  }
+  return allow;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+  /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
+  MCode *p = as->mctop;
+  MCode *target, *q;
+  int32_t spadj = as->T->spadjust;
+  if (spadj == 0) {
+    p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
+  } else {
+    MCode *p1;
+    /* Patch stack adjustment. */
+    if (checki8(spadj)) {
+      p -= 3;
+      p1 = p-6;
+      *p1 = (MCode)spadj;
+    } else {
+      p1 = p-9;
+      *(int32_t *)p1 = spadj;
+    }
+    if ((as->flags & JIT_F_LEA_AGU)) {
+#if LJ_64
+      p1[-4] = 0x48;
+#endif
+      p1[-3] = (MCode)XI_LEA;
+      p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
+      p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
+    } else {
+#if LJ_64
+      p1[-3] = 0x48;
+#endif
+      p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
+      p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
+    }
+  }
+  /* Patch exit branch. */
+  target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
+  *(int32_t *)(p-4) = jmprel(p, target);
+  p[-5] = XI_JMP;
+  /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
+  for (q = as->mctop-1; q >= p; q--)
+    *q = XI_NOP;
+  as->mctop = p;
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+  MCode *p = as->mctop;
+  /* Realign and leave room for backwards loop branch or exit branch. */
+  if (as->realign) {
+    int i = ((int)(intptr_t)as->realign) & 15;
+    /* Fill unused mcode tail with NOPs to make the prefetcher happy. */
+    while (i-- > 0)
+      *--p = XI_NOP;
+    as->mctop = p;
+    p -= (as->loopinv ? 5 : 2);  /* Space for short/near jmp. */
+  } else {
+    p -= 5;  /* Space for exit branch (near jmp). */
+  }
+  if (as->loopref) {
+    as->invmcp = as->mcp = p;
+  } else {
+    /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
+    as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6)  + (LJ_64 ? 1 : 0));
+    as->invmcp = NULL;
+  }
+}
+
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+  switch ((IROp)ir->o) {
+  /* Miscellaneous ops. */
+  case IR_LOOP: asm_loop(as); break;
+  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
+  case IR_USE:
+    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+  case IR_PHI: asm_phi(as, ir); break;
+  case IR_HIOP: asm_hiop(as, ir); break;
+  case IR_GCSTEP: asm_gcstep(as, ir); break;
+
+  /* Guarded assertions. */
+  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
+  case IR_EQ: case IR_NE: case IR_ABC:
+    asm_comp(as, ir, asm_compmap[ir->o]);
+    break;
+
+  case IR_RETF: asm_retf(as, ir); break;
+
+  /* Bit ops. */
+  case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
+  case IR_BSWAP: asm_bitswap(as, ir); break;
+
+  case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
+  case IR_BOR:  asm_intarith(as, ir, XOg_OR); break;
+  case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
+
+  case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
+  case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
+  case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
+  case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
+  case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
+
+  /* Arithmetic ops. */
+  case IR_ADD: asm_add(as, ir); break;
+  case IR_SUB:
+    if (irt_isnum(ir->t))
+      asm_fparith(as, ir, XO_SUBSD);
+    else  /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
+      asm_intarith(as, ir, XOg_SUB);
+    break;
+  case IR_MUL:
+    if (irt_isnum(ir->t))
+      asm_fparith(as, ir, XO_MULSD);
+    else
+      asm_intarith(as, ir, XOg_X_IMUL);
+    break;
+  case IR_DIV:
+#if LJ_64 && LJ_HASFFI
+    if (!irt_isnum(ir->t))
+      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+					     IRCALL_lj_carith_divu64);
+    else
+#endif
+      asm_fparith(as, ir, XO_DIVSD);
+    break;
+  case IR_MOD:
+#if LJ_64 && LJ_HASFFI
+    if (!irt_isint(ir->t))
+      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					     IRCALL_lj_carith_modu64);
+    else
+#endif
+      asm_intmod(as, ir);
+    break;
+
+  case IR_NEG:
+    if (irt_isnum(ir->t))
+      asm_fparith(as, ir, XO_XORPS);
+    else
+      asm_neg_not(as, ir, XOg_NEG);
+    break;
+  case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
+
+  case IR_MIN:
+    if (irt_isnum(ir->t))
+      asm_fparith(as, ir, XO_MINSD);
+    else
+      asm_min_max(as, ir, CC_G);
+    break;
+  case IR_MAX:
+    if (irt_isnum(ir->t))
+      asm_fparith(as, ir, XO_MAXSD);
+    else
+      asm_min_max(as, ir, CC_L);
+    break;
+
+  case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
+    asm_fpmath(as, ir);
+    break;
+  case IR_POW:
+#if LJ_64 && LJ_HASFFI
+    if (!irt_isnum(ir->t))
+      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+					     IRCALL_lj_carith_powu64);
+    else
+#endif
+      asm_fppowi(as, ir);
+    break;
+
+  /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
+  case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
+  case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
+  case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
+
+  /* Memory references. */
+  case IR_AREF: asm_aref(as, ir); break;
+  case IR_HREF: asm_href(as, ir); break;
+  case IR_HREFK: asm_hrefk(as, ir); break;
+  case IR_NEWREF: asm_newref(as, ir); break;
+  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+  case IR_FREF: asm_fref(as, ir); break;
+  case IR_STRREF: asm_strref(as, ir); break;
+
+  /* Loads and stores. */
+  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+    asm_ahuvload(as, ir);
+    break;
+  case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
+  case IR_SLOAD: asm_sload(as, ir); break;
+
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+  case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
+
+  /* Allocations. */
+  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+  case IR_TNEW: asm_tnew(as, ir); break;
+  case IR_TDUP: asm_tdup(as, ir); break;
+  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
+
+  /* Write barriers. */
+  case IR_TBAR: asm_tbar(as, ir); break;
+  case IR_OBAR: asm_obar(as, ir); break;
+
+  /* Type conversions. */
+  case IR_TOBIT: asm_tobit(as, ir); break;
+  case IR_CONV: asm_conv(as, ir); break;
+  case IR_TOSTR: asm_tostr(as, ir); break;
+  case IR_STRTO: asm_strto(as, ir); break;
+
+  /* Calls. */
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+  case IR_CALLXS: asm_callx(as, ir); break;
+  case IR_CARG: break;
+
+  default:
+    setintV(&as->J->errinfo, ir->o);
+    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+    break;
+  }
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  int nslots;
+  asm_collectargs(as, ir, ci, args);
+  nslots = asm_count_call_slots(as, ci, args);
+  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
+    as->evenspill = nslots;
+#if LJ_64
+  return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
+#else
+  return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET);
+#endif
+}
+
+/* Target-specific setup. */
+static void asm_setup_target(ASMState *as)
+{
+  asm_exitstub_setup(as, as->T->nsnap);
+}
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+  MCode *p = T->mcode;
+  MCode *mcarea = lj_mcode_patch(J, p, 0);
+  MSize len = T->szmcode;
+  MCode *px = exitstub_addr(J, exitno) - 6;
+  MCode *pe = p+len-6;
+  uint32_t stateaddr = u32ptr(&J2G(J)->vmstate);
+  if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
+    *(int32_t *)(p+len-4) = jmprel(p+len, target);
+  /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
+  for (; p < pe; p++)
+    if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) {
+      p += LJ_64 ? 11 : 10;
+      break;
+    }
+  lua_assert(p < pe);
+  for (; p < pe; p++) {
+    if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) {
+      *(int32_t *)(p+2) = jmprel(p+6, target);
+      p += 5;
+    }
+  }
+  lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
+  lj_mcode_patch(J, mcarea, 1);
+}
+

+ 14 - 0
luajit.mod/luajit/src/lj_bc.c

@@ -0,0 +1,14 @@
+/*
+** Bytecode instruction modes.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_bc_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_bc.h"
+
+/* Bytecode offsets and bytecode instruction modes. */
+#include "lj_bcdef.h"
+

+ 261 - 0
luajit.mod/luajit/src/lj_bc.h

@@ -0,0 +1,261 @@
+/*
+** Bytecode instruction format.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_BC_H
+#define _LJ_BC_H
+
+#include "lj_def.h"
+#include "lj_arch.h"
+
+/* Bytecode instruction format, 32 bit wide, fields of 8 or 16 bit:
+**
+** +----+----+----+----+
+** | B  | C  | A  | OP | Format ABC
+** +----+----+----+----+
+** |    D    | A  | OP | Format AD
+** +--------------------
+** MSB               LSB
+**
+** In-memory instructions are always stored in host byte order.
+*/
+
+/* Operand ranges and related constants. */
+#define BCMAX_A		0xff
+#define BCMAX_B		0xff
+#define BCMAX_C		0xff
+#define BCMAX_D		0xffff
+#define BCBIAS_J	0x8000
+#define NO_REG		BCMAX_A
+#define NO_JMP		(~(BCPos)0)
+
+/* Macros to get instruction fields. */
+#define bc_op(i)	((BCOp)((i)&0xff))
+#define bc_a(i)		((BCReg)(((i)>>8)&0xff))
+#define bc_b(i)		((BCReg)((i)>>24))
+#define bc_c(i)		((BCReg)(((i)>>16)&0xff))
+#define bc_d(i)		((BCReg)((i)>>16))
+#define bc_j(i)		((ptrdiff_t)bc_d(i)-BCBIAS_J)
+
+/* Macros to set instruction fields. */
+#define setbc_byte(p, x, ofs) \
+  ((uint8_t *)(p))[LJ_ENDIAN_SELECT(ofs, 3-ofs)] = (uint8_t)(x)
+#define setbc_op(p, x)	setbc_byte(p, (x), 0)
+#define setbc_a(p, x)	setbc_byte(p, (x), 1)
+#define setbc_b(p, x)	setbc_byte(p, (x), 3)
+#define setbc_c(p, x)	setbc_byte(p, (x), 2)
+#define setbc_d(p, x) \
+  ((uint16_t *)(p))[LJ_ENDIAN_SELECT(1, 0)] = (uint16_t)(x)
+#define setbc_j(p, x)	setbc_d(p, (BCPos)((int32_t)(x)+BCBIAS_J))
+
+/* Macros to compose instructions. */
+#define BCINS_ABC(o, a, b, c) \
+  (((BCIns)(o))|((BCIns)(a)<<8)|((BCIns)(b)<<24)|((BCIns)(c)<<16))
+#define BCINS_AD(o, a, d) \
+  (((BCIns)(o))|((BCIns)(a)<<8)|((BCIns)(d)<<16))
+#define BCINS_AJ(o, a, j)	BCINS_AD(o, a, (BCPos)((int32_t)(j)+BCBIAS_J))
+
+/* Bytecode instruction definition. Order matters, see below.
+**
+** (name, filler, Amode, Bmode, Cmode or Dmode, metamethod)
+**
+** The opcode name suffixes specify the type for RB/RC or RD:
+** V = variable slot
+** S = string const
+** N = number const
+** P = primitive type (~itype)
+** B = unsigned byte literal
+** M = multiple args/results
+*/
+#define BCDEF(_) \
+  /* Comparison ops. ORDER OPR. */ \
+  _(ISLT,	var,	___,	var,	lt) \
+  _(ISGE,	var,	___,	var,	lt) \
+  _(ISLE,	var,	___,	var,	le) \
+  _(ISGT,	var,	___,	var,	le) \
+  \
+  _(ISEQV,	var,	___,	var,	eq) \
+  _(ISNEV,	var,	___,	var,	eq) \
+  _(ISEQS,	var,	___,	str,	eq) \
+  _(ISNES,	var,	___,	str,	eq) \
+  _(ISEQN,	var,	___,	num,	eq) \
+  _(ISNEN,	var,	___,	num,	eq) \
+  _(ISEQP,	var,	___,	pri,	eq) \
+  _(ISNEP,	var,	___,	pri,	eq) \
+  \
+  /* Unary test and copy ops. */ \
+  _(ISTC,	dst,	___,	var,	___) \
+  _(ISFC,	dst,	___,	var,	___) \
+  _(IST,	___,	___,	var,	___) \
+  _(ISF,	___,	___,	var,	___) \
+  \
+  /* Unary ops. */ \
+  _(MOV,	dst,	___,	var,	___) \
+  _(NOT,	dst,	___,	var,	___) \
+  _(UNM,	dst,	___,	var,	unm) \
+  _(LEN,	dst,	___,	var,	len) \
+  \
+  /* Binary ops. ORDER OPR. VV last, POW must be next. */ \
+  _(ADDVN,	dst,	var,	num,	add) \
+  _(SUBVN,	dst,	var,	num,	sub) \
+  _(MULVN,	dst,	var,	num,	mul) \
+  _(DIVVN,	dst,	var,	num,	div) \
+  _(MODVN,	dst,	var,	num,	mod) \
+  \
+  _(ADDNV,	dst,	var,	num,	add) \
+  _(SUBNV,	dst,	var,	num,	sub) \
+  _(MULNV,	dst,	var,	num,	mul) \
+  _(DIVNV,	dst,	var,	num,	div) \
+  _(MODNV,	dst,	var,	num,	mod) \
+  \
+  _(ADDVV,	dst,	var,	var,	add) \
+  _(SUBVV,	dst,	var,	var,	sub) \
+  _(MULVV,	dst,	var,	var,	mul) \
+  _(DIVVV,	dst,	var,	var,	div) \
+  _(MODVV,	dst,	var,	var,	mod) \
+  \
+  _(POW,	dst,	var,	var,	pow) \
+  _(CAT,	dst,	rbase,	rbase,	concat) \
+  \
+  /* Constant ops. */ \
+  _(KSTR,	dst,	___,	str,	___) \
+  _(KCDATA,	dst,	___,	cdata,	___) \
+  _(KSHORT,	dst,	___,	lits,	___) \
+  _(KNUM,	dst,	___,	num,	___) \
+  _(KPRI,	dst,	___,	pri,	___) \
+  _(KNIL,	base,	___,	base,	___) \
+  \
+  /* Upvalue and function ops. */ \
+  _(UGET,	dst,	___,	uv,	___) \
+  _(USETV,	uv,	___,	var,	___) \
+  _(USETS,	uv,	___,	str,	___) \
+  _(USETN,	uv,	___,	num,	___) \
+  _(USETP,	uv,	___,	pri,	___) \
+  _(UCLO,	rbase,	___,	jump,	___) \
+  _(FNEW,	dst,	___,	func,	gc) \
+  \
+  /* Table ops. */ \
+  _(TNEW,	dst,	___,	lit,	gc) \
+  _(TDUP,	dst,	___,	tab,	gc) \
+  _(GGET,	dst,	___,	str,	index) \
+  _(GSET,	var,	___,	str,	newindex) \
+  _(TGETV,	dst,	var,	var,	index) \
+  _(TGETS,	dst,	var,	str,	index) \
+  _(TGETB,	dst,	var,	lit,	index) \
+  _(TSETV,	var,	var,	var,	newindex) \
+  _(TSETS,	var,	var,	str,	newindex) \
+  _(TSETB,	var,	var,	lit,	newindex) \
+  _(TSETM,	base,	___,	num,	newindex) \
+  \
+  /* Calls and vararg handling. T = tail call. */ \
+  _(CALLM,	base,	lit,	lit,	call) \
+  _(CALL,	base,	lit,	lit,	call) \
+  _(CALLMT,	base,	___,	lit,	call) \
+  _(CALLT,	base,	___,	lit,	call) \
+  _(ITERC,	base,	lit,	lit,	call) \
+  _(ITERN,	base,	lit,	lit,	call) \
+  _(VARG,	base,	lit,	lit,	___) \
+  _(ISNEXT,	base,	___,	jump,	___) \
+  \
+  /* Returns. */ \
+  _(RETM,	base,	___,	lit,	___) \
+  _(RET,	rbase,	___,	lit,	___) \
+  _(RET0,	rbase,	___,	lit,	___) \
+  _(RET1,	rbase,	___,	lit,	___) \
+  \
+  /* Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop. */ \
+  _(FORI,	base,	___,	jump,	___) \
+  _(JFORI,	base,	___,	jump,	___) \
+  \
+  _(FORL,	base,	___,	jump,	___) \
+  _(IFORL,	base,	___,	jump,	___) \
+  _(JFORL,	base,	___,	lit,	___) \
+  \
+  _(ITERL,	base,	___,	jump,	___) \
+  _(IITERL,	base,	___,	jump,	___) \
+  _(JITERL,	base,	___,	lit,	___) \
+  \
+  _(LOOP,	rbase,	___,	jump,	___) \
+  _(ILOOP,	rbase,	___,	jump,	___) \
+  _(JLOOP,	rbase,	___,	lit,	___) \
+  \
+  _(JMP,	rbase,	___,	jump,	___) \
+  \
+  /* Function headers. I/J = interp/JIT, F/V/C = fixarg/vararg/C func. */ \
+  _(FUNCF,	rbase,	___,	___,	___) \
+  _(IFUNCF,	rbase,	___,	___,	___) \
+  _(JFUNCF,	rbase,	___,	lit,	___) \
+  _(FUNCV,	rbase,	___,	___,	___) \
+  _(IFUNCV,	rbase,	___,	___,	___) \
+  _(JFUNCV,	rbase,	___,	lit,	___) \
+  _(FUNCC,	rbase,	___,	___,	___) \
+  _(FUNCCW,	rbase,	___,	___,	___)
+
+/* Bytecode opcode numbers. */
+typedef enum {
+#define BCENUM(name, ma, mb, mc, mt)	BC_##name,
+BCDEF(BCENUM)
+#undef BCENUM
+  BC__MAX
+} BCOp;
+
+LJ_STATIC_ASSERT((int)BC_ISEQV+1 == (int)BC_ISNEV);
+LJ_STATIC_ASSERT(((int)BC_ISEQV^1) == (int)BC_ISNEV);
+LJ_STATIC_ASSERT(((int)BC_ISEQS^1) == (int)BC_ISNES);
+LJ_STATIC_ASSERT(((int)BC_ISEQN^1) == (int)BC_ISNEN);
+LJ_STATIC_ASSERT(((int)BC_ISEQP^1) == (int)BC_ISNEP);
+LJ_STATIC_ASSERT(((int)BC_ISLT^1) == (int)BC_ISGE);
+LJ_STATIC_ASSERT(((int)BC_ISLE^1) == (int)BC_ISGT);
+LJ_STATIC_ASSERT(((int)BC_ISLT^3) == (int)BC_ISGT);
+LJ_STATIC_ASSERT((int)BC_IST-(int)BC_ISTC == (int)BC_ISF-(int)BC_ISFC);
+LJ_STATIC_ASSERT((int)BC_CALLT-(int)BC_CALL == (int)BC_CALLMT-(int)BC_CALLM);
+LJ_STATIC_ASSERT((int)BC_CALLMT + 1 == (int)BC_CALLT);
+LJ_STATIC_ASSERT((int)BC_RETM + 1 == (int)BC_RET);
+LJ_STATIC_ASSERT((int)BC_FORL + 1 == (int)BC_IFORL);
+LJ_STATIC_ASSERT((int)BC_FORL + 2 == (int)BC_JFORL);
+LJ_STATIC_ASSERT((int)BC_ITERL + 1 == (int)BC_IITERL);
+LJ_STATIC_ASSERT((int)BC_ITERL + 2 == (int)BC_JITERL);
+LJ_STATIC_ASSERT((int)BC_LOOP + 1 == (int)BC_ILOOP);
+LJ_STATIC_ASSERT((int)BC_LOOP + 2 == (int)BC_JLOOP);
+LJ_STATIC_ASSERT((int)BC_FUNCF + 1 == (int)BC_IFUNCF);
+LJ_STATIC_ASSERT((int)BC_FUNCF + 2 == (int)BC_JFUNCF);
+LJ_STATIC_ASSERT((int)BC_FUNCV + 1 == (int)BC_IFUNCV);
+LJ_STATIC_ASSERT((int)BC_FUNCV + 2 == (int)BC_JFUNCV);
+
+/* This solves a circular dependency problem, change as needed. */
+#define FF_next_N	4
+
+/* Stack slots used by FORI/FORL, relative to operand A. */
+enum {
+  FORL_IDX, FORL_STOP, FORL_STEP, FORL_EXT
+};
+
+/* Bytecode operand modes. ORDER BCMode */
+typedef enum {
+  BCMnone, BCMdst, BCMbase, BCMvar, BCMrbase, BCMuv,  /* Mode A must be <= 7 */
+  BCMlit, BCMlits, BCMpri, BCMnum, BCMstr, BCMtab, BCMfunc, BCMjump, BCMcdata,
+  BCM_max
+} BCMode;
+#define BCM___		BCMnone
+
+#define bcmode_a(op)	((BCMode)(lj_bc_mode[op] & 7))
+#define bcmode_b(op)	((BCMode)((lj_bc_mode[op]>>3) & 15))
+#define bcmode_c(op)	((BCMode)((lj_bc_mode[op]>>7) & 15))
+#define bcmode_d(op)	bcmode_c(op)
+#define bcmode_hasd(op)	((lj_bc_mode[op] & (15<<3)) == (BCMnone<<3))
+#define bcmode_mm(op)	((MMS)(lj_bc_mode[op]>>11))
+
+#define BCMODE(name, ma, mb, mc, mm) \
+  (BCM##ma|(BCM##mb<<3)|(BCM##mc<<7)|(MM_##mm<<11)),
+#define BCMODE_FF	0
+
+static LJ_AINLINE int bc_isret(BCOp op)
+{
+  return (op == BC_RETM || op == BC_RET || op == BC_RET0 || op == BC_RET1);
+}
+
+LJ_DATA const uint16_t lj_bc_mode[];
+LJ_DATA const uint16_t lj_bc_ofs[];
+
+#endif

+ 66 - 0
luajit.mod/luajit/src/lj_bcdump.h

@@ -0,0 +1,66 @@
+/*
+** Bytecode dump definitions.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_BCDUMP_H
+#define _LJ_BCDUMP_H
+
+#include "lj_obj.h"
+#include "lj_lex.h"
+
+/* -- Bytecode dump format ------------------------------------------------ */
+
+/*
+** dump   = header proto+ 0U
+** header = ESC 'L' 'J' versionB flagsU [namelenU nameB*]
+** proto  = lengthU pdata
+** pdata  = phead bcinsW* uvdataH* kgc* knum* [debugB*]
+** phead  = flagsB numparamsB framesizeB numuvB numkgcU numknU numbcU
+**          [debuglenU [firstlineU numlineU]]
+** kgc    = kgctypeU { ktab | (loU hiU) | (rloU rhiU iloU ihiU) | strB* }
+** knum   = intU0 | (loU1 hiU)
+** ktab   = narrayU nhashU karray* khash*
+** karray = ktabk
+** khash  = ktabk ktabk
+** ktabk  = ktabtypeU { intU | (loU hiU) | strB* }
+**
+** B = 8 bit, H = 16 bit, W = 32 bit, U = ULEB128 of W, U0/U1 = ULEB128 of W+1
+*/
+
+/* Bytecode dump header. */
+#define BCDUMP_HEAD1		0x1b
+#define BCDUMP_HEAD2		0x4c
+#define BCDUMP_HEAD3		0x4a
+
+/* If you perform *any* kind of private modifications to the bytecode itself
+** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
+*/
+#define BCDUMP_VERSION		1
+
+/* Compatibility flags. */
+#define BCDUMP_F_BE		0x01
+#define BCDUMP_F_STRIP		0x02
+#define BCDUMP_F_FFI		0x04
+
+#define BCDUMP_F_KNOWN		(BCDUMP_F_FFI*2-1)
+
+/* Type codes for the GC constants of a prototype. Plus length for strings. */
+enum {
+  BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64,
+  BCDUMP_KGC_COMPLEX, BCDUMP_KGC_STR
+};
+
+/* Type codes for the keys/values of a constant table. */
+enum {
+  BCDUMP_KTAB_NIL, BCDUMP_KTAB_FALSE, BCDUMP_KTAB_TRUE,
+  BCDUMP_KTAB_INT, BCDUMP_KTAB_NUM, BCDUMP_KTAB_STR
+};
+
+/* -- Bytecode reader/writer ---------------------------------------------- */
+
+LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
+		       void *data, int strip);
+LJ_FUNC GCproto *lj_bcread(LexState *ls);
+
+#endif

Някои файлове не бяха показани, защото твърде много файлове са промени