Browse Source

Updated LuaJIT to latest 2.1 version

Martin Felis 10 years ago
parent
commit
28d2d92645
100 changed files with 6877 additions and 3792 deletions
  1. BIN
      jni/LuaJIT-2.0.1/android/armeabi-v7a/libluajit.a
  2. BIN
      jni/LuaJIT-2.0.1/android/armeabi/libluajit.a
  3. 0 74
      jni/LuaJIT-2.0.1/src/lib_bit.c
  4. 0 481
      jni/LuaJIT-2.0.1/src/lj_lex.c
  5. 0 339
      jni/LuaJIT-2.0.1/src/lj_str.c
  6. 0 50
      jni/LuaJIT-2.0.1/src/lj_str.h
  7. 0 0
      jni/LuaJIT-2.1/Android.mk
  8. 1 1
      jni/LuaJIT-2.1/COPYRIGHT
  9. 21 11
      jni/LuaJIT-2.1/Makefile
  10. 3 3
      jni/LuaJIT-2.1/README
  11. BIN
      jni/LuaJIT-2.1/android/armeabi-v7a/libluajit.a
  12. BIN
      jni/LuaJIT-2.1/android/armeabi/libluajit.a
  13. 2 2
      jni/LuaJIT-2.1/buildandroid.sh
  14. 1 1
      jni/LuaJIT-2.1/doc/bluequad-print.css
  15. 1 1
      jni/LuaJIT-2.1/doc/bluequad.css
  16. 177 232
      jni/LuaJIT-2.1/doc/changes.html
  17. 5 3
      jni/LuaJIT-2.1/doc/contact.html
  18. 4 2
      jni/LuaJIT-2.1/doc/ext_c_api.html
  19. 4 2
      jni/LuaJIT-2.1/doc/ext_ffi.html
  20. 8 4
      jni/LuaJIT-2.1/doc/ext_ffi_api.html
  21. 27 7
      jni/LuaJIT-2.1/doc/ext_ffi_semantics.html
  22. 5 3
      jni/LuaJIT-2.1/doc/ext_ffi_tutorial.html
  23. 5 3
      jni/LuaJIT-2.1/doc/ext_jit.html
  24. 365 0
      jni/LuaJIT-2.1/doc/ext_profiler.html
  25. 53 5
      jni/LuaJIT-2.1/doc/extensions.html
  26. 4 2
      jni/LuaJIT-2.1/doc/faq.html
  27. 0 0
      jni/LuaJIT-2.1/doc/img/contact.png
  28. 92 42
      jni/LuaJIT-2.1/doc/install.html
  29. 13 5
      jni/LuaJIT-2.1/doc/luajit.html
  30. 6 3
      jni/LuaJIT-2.1/doc/running.html
  31. 6 13
      jni/LuaJIT-2.1/doc/status.html
  32. 3 2
      jni/LuaJIT-2.1/dynasm/dasm_arm.h
  33. 9 6
      jni/LuaJIT-2.1/dynasm/dasm_arm.lua
  34. 518 0
      jni/LuaJIT-2.1/dynasm/dasm_arm64.h
  35. 1166 0
      jni/LuaJIT-2.1/dynasm/dasm_arm64.lua
  36. 3 2
      jni/LuaJIT-2.1/dynasm/dasm_mips.h
  37. 4 4
      jni/LuaJIT-2.1/dynasm/dasm_mips.lua
  38. 13 5
      jni/LuaJIT-2.1/dynasm/dasm_ppc.h
  39. 687 17
      jni/LuaJIT-2.1/dynasm/dasm_ppc.lua
  40. 3 3
      jni/LuaJIT-2.1/dynasm/dasm_proto.h
  41. 1 1
      jni/LuaJIT-2.1/dynasm/dasm_x64.lua
  42. 9 3
      jni/LuaJIT-2.1/dynasm/dasm_x86.h
  43. 366 80
      jni/LuaJIT-2.1/dynasm/dasm_x86.lua
  44. 10 11
      jni/LuaJIT-2.1/dynasm/dynasm.lua
  45. 1 1
      jni/LuaJIT-2.1/etc/luajit.1
  46. 6 5
      jni/LuaJIT-2.1/etc/luajit.pc
  47. 1 1
      jni/LuaJIT-2.1/src/.gitignore
  48. 90 59
      jni/LuaJIT-2.1/src/Makefile
  49. 111 93
      jni/LuaJIT-2.1/src/Makefile.dep
  50. 0 0
      jni/LuaJIT-2.1/src/host/.gitignore
  51. 0 0
      jni/LuaJIT-2.1/src/host/README
  52. 18 10
      jni/LuaJIT-2.1/src/host/buildvm.c
  53. 2 1
      jni/LuaJIT-2.1/src/host/buildvm.h
  54. 44 12
      jni/LuaJIT-2.1/src/host/buildvm_asm.c
  55. 1 1
      jni/LuaJIT-2.1/src/host/buildvm_fold.c
  56. 61 2
      jni/LuaJIT-2.1/src/host/buildvm_lib.c
  57. 45 0
      jni/LuaJIT-2.1/src/host/buildvm_libbc.h
  58. 1 1
      jni/LuaJIT-2.1/src/host/buildvm_peobj.c
  59. 197 0
      jni/LuaJIT-2.1/src/host/genlibbc.lua
  60. 2 1
      jni/LuaJIT-2.1/src/host/genminilua.lua
  61. 1 0
      jni/LuaJIT-2.1/src/host/minilua.c
  62. 0 0
      jni/LuaJIT-2.1/src/jit/.gitignore
  63. 10 11
      jni/LuaJIT-2.1/src/jit/bc.lua
  64. 16 14
      jni/LuaJIT-2.1/src/jit/bcsave.lua
  65. 10 10
      jni/LuaJIT-2.1/src/jit/dis_arm.lua
  66. 16 16
      jni/LuaJIT-2.1/src/jit/dis_mips.lua
  67. 7 10
      jni/LuaJIT-2.1/src/jit/dis_mipsel.lua
  68. 10 10
      jni/LuaJIT-2.1/src/jit/dis_ppc.lua
  69. 7 10
      jni/LuaJIT-2.1/src/jit/dis_x64.lua
  70. 176 89
      jni/LuaJIT-2.1/src/jit/dis_x86.lua
  71. 21 14
      jni/LuaJIT-2.1/src/jit/dump.lua
  72. 310 0
      jni/LuaJIT-2.1/src/jit/p.lua
  73. 10 7
      jni/LuaJIT-2.1/src/jit/v.lua
  74. 45 0
      jni/LuaJIT-2.1/src/jit/zone.lua
  75. 0 0
      jni/LuaJIT-2.1/src/lauxlib.h
  76. 2 2
      jni/LuaJIT-2.1/src/lib_aux.c
  77. 34 53
      jni/LuaJIT-2.1/src/lib_base.c
  78. 180 0
      jni/LuaJIT-2.1/src/lib_bit.c
  79. 2 2
      jni/LuaJIT-2.1/src/lib_debug.c
  80. 37 15
      jni/LuaJIT-2.1/src/lib_ffi.c
  81. 1 1
      jni/LuaJIT-2.1/src/lib_init.c
  82. 30 24
      jni/LuaJIT-2.1/src/lib_io.c
  83. 131 27
      jni/LuaJIT-2.1/src/lib_jit.c
  84. 4 7
      jni/LuaJIT-2.1/src/lib_math.c
  85. 32 20
      jni/LuaJIT-2.1/src/lib_os.c
  86. 12 7
      jni/LuaJIT-2.1/src/lib_package.c
  87. 131 319
      jni/LuaJIT-2.1/src/lib_string.c
  88. 90 83
      jni/LuaJIT-2.1/src/lib_table.c
  89. 15 0
      jni/LuaJIT-2.1/src/lj.supp
  90. 30 13
      jni/LuaJIT-2.1/src/lj_alloc.c
  91. 0 0
      jni/LuaJIT-2.1/src/lj_alloc.h
  92. 76 63
      jni/LuaJIT-2.1/src/lj_api.c
  93. 146 49
      jni/LuaJIT-2.1/src/lj_arch.h
  94. 471 79
      jni/LuaJIT-2.1/src/lj_asm.c
  95. 1 1
      jni/LuaJIT-2.1/src/lj_asm.h
  96. 173 308
      jni/LuaJIT-2.1/src/lj_asm_arm.h
  97. 118 259
      jni/LuaJIT-2.1/src/lj_asm_mips.h
  98. 119 269
      jni/LuaJIT-2.1/src/lj_asm_ppc.h
  99. 227 389
      jni/LuaJIT-2.1/src/lj_asm_x86.h
  100. 1 1
      jni/LuaJIT-2.1/src/lj_bc.c

BIN
jni/LuaJIT-2.0.1/android/armeabi-v7a/libluajit.a


BIN
jni/LuaJIT-2.0.1/android/armeabi/libluajit.a


+ 0 - 74
jni/LuaJIT-2.0.1/src/lib_bit.c

@@ -1,74 +0,0 @@
-/*
-** Bit manipulation library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
-*/
-
-#define lib_bit_c
-#define LUA_LIB
-
-#include "lua.h"
-#include "lauxlib.h"
-#include "lualib.h"
-
-#include "lj_obj.h"
-#include "lj_err.h"
-#include "lj_str.h"
-#include "lj_lib.h"
-
-/* ------------------------------------------------------------------------ */
-
-#define LJLIB_MODULE_bit
-
-LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_unary IR_TOBIT)
-{
-  lj_lib_checknumber(L, 1);
-  return FFH_RETRY;
-}
-LJLIB_ASM_(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
-LJLIB_ASM_(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
-
-LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
-{
-  lj_lib_checknumber(L, 1);
-  lj_lib_checkbit(L, 2);
-  return FFH_RETRY;
-}
-LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
-LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
-LJLIB_ASM_(bit_rol)		LJLIB_REC(bit_shift IR_BROL)
-LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
-
-LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
-{
-  int i = 0;
-  do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
-  return FFH_RETRY;
-}
-LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
-LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
-
-/* ------------------------------------------------------------------------ */
-
-LJLIB_CF(bit_tohex)
-{
-  uint32_t b = (uint32_t)lj_lib_checkbit(L, 1);
-  int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2);
-  const char *hexdigits = "0123456789abcdef";
-  char buf[8];
-  if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
-  if (n > 8) n = 8;
-  for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
-  lua_pushlstring(L, buf, (size_t)n);
-  return 1;
-}
-
-/* ------------------------------------------------------------------------ */
-
-#include "lj_libdef.h"
-
-LUALIB_API int luaopen_bit(lua_State *L)
-{
-  LJ_LIB_REG(L, LUA_BITLIBNAME, bit);
-  return 1;
-}
-

+ 0 - 481
jni/LuaJIT-2.0.1/src/lj_lex.c

@@ -1,481 +0,0 @@
-/*
-** Lexical analyzer.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
-**
-** Major portions taken verbatim or adapted from the Lua interpreter.
-** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
-*/
-
-#define lj_lex_c
-#define LUA_CORE
-
-#include "lj_obj.h"
-#include "lj_gc.h"
-#include "lj_err.h"
-#include "lj_str.h"
-#if LJ_HASFFI
-#include "lj_tab.h"
-#include "lj_ctype.h"
-#include "lj_cdata.h"
-#include "lualib.h"
-#endif
-#include "lj_state.h"
-#include "lj_lex.h"
-#include "lj_parse.h"
-#include "lj_char.h"
-#include "lj_strscan.h"
-
-/* Lua lexer token names. */
-static const char *const tokennames[] = {
-#define TKSTR1(name)		#name,
-#define TKSTR2(name, sym)	#sym,
-TKDEF(TKSTR1, TKSTR2)
-#undef TKSTR1
-#undef TKSTR2
-  NULL
-};
-
-/* -- Buffer handling ----------------------------------------------------- */
-
-#define char2int(c)		((int)(uint8_t)(c))
-#define next(ls) \
-  (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
-#define save_and_next(ls)	(save(ls, ls->current), next(ls))
-#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
-#define END_OF_STREAM		(-1)
-
-static int fillbuf(LexState *ls)
-{
-  size_t sz;
-  const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
-  if (buf == NULL || sz == 0) return END_OF_STREAM;
-  ls->n = (MSize)sz - 1;
-  ls->p = buf;
-  return char2int(*(ls->p++));
-}
-
-static LJ_NOINLINE void save_grow(LexState *ls, int c)
-{
-  MSize newsize;
-  if (ls->sb.sz >= LJ_MAX_STR/2)
-    lj_lex_error(ls, 0, LJ_ERR_XELEM);
-  newsize = ls->sb.sz * 2;
-  lj_str_resizebuf(ls->L, &ls->sb, newsize);
-  ls->sb.buf[ls->sb.n++] = (char)c;
-}
-
-static LJ_AINLINE void save(LexState *ls, int c)
-{
-  if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz))
-    save_grow(ls, c);
-  else
-    ls->sb.buf[ls->sb.n++] = (char)c;
-}
-
-static void inclinenumber(LexState *ls)
-{
-  int old = ls->current;
-  lua_assert(currIsNewline(ls));
-  next(ls);  /* skip `\n' or `\r' */
-  if (currIsNewline(ls) && ls->current != old)
-    next(ls);  /* skip `\n\r' or `\r\n' */
-  if (++ls->linenumber >= LJ_MAX_LINE)
-    lj_lex_error(ls, ls->token, LJ_ERR_XLINES);
-}
-
-/* -- Scanner for terminals ----------------------------------------------- */
-
-/* Parse a number literal. */
-static void lex_number(LexState *ls, TValue *tv)
-{
-  StrScanFmt fmt;
-  int c, xp = 'e';
-  lua_assert(lj_char_isdigit(ls->current));
-  if ((c = ls->current) == '0') {
-    save_and_next(ls);
-    if ((ls->current | 0x20) == 'x') xp = 'p';
-  }
-  while (lj_char_isident(ls->current) || ls->current == '.' ||
-	 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) {
-    c = ls->current;
-    save_and_next(ls);
-  }
-  save(ls, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv,
-	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
-	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
-  if (LJ_DUALNUM && fmt == STRSCAN_INT) {
-    setitype(tv, LJ_TISNUM);
-  } else if (fmt == STRSCAN_NUM) {
-    /* Already in correct format. */
-#if LJ_HASFFI
-  } else if (fmt != STRSCAN_ERROR) {
-    lua_State *L = ls->L;
-    GCcdata *cd;
-    lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG);
-    if (!ctype_ctsG(G(L))) {
-      ptrdiff_t oldtop = savestack(L, L->top);
-      luaopen_ffi(L);  /* Load FFI library on-demand. */
-      L->top = restorestack(L, oldtop);
-    }
-    if (fmt == STRSCAN_IMAG) {
-      cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
-      ((double *)cdataptr(cd))[0] = 0;
-      ((double *)cdataptr(cd))[1] = numV(tv);
-    } else {
-      cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, 8);
-      *(uint64_t *)cdataptr(cd) = tv->u64;
-    }
-    lj_parse_keepcdata(ls, tv, cd);
-#endif
-  } else {
-    lua_assert(fmt == STRSCAN_ERROR);
-    lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
-  }
-}
-
-static int skip_sep(LexState *ls)
-{
-  int count = 0;
-  int s = ls->current;
-  lua_assert(s == '[' || s == ']');
-  save_and_next(ls);
-  while (ls->current == '=') {
-    save_and_next(ls);
-    count++;
-  }
-  return (ls->current == s) ? count : (-count) - 1;
-}
-
-static void read_long_string(LexState *ls, TValue *tv, int sep)
-{
-  save_and_next(ls);  /* skip 2nd `[' */
-  if (currIsNewline(ls))  /* string starts with a newline? */
-    inclinenumber(ls);  /* skip it */
-  for (;;) {
-    switch (ls->current) {
-    case END_OF_STREAM:
-      lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
-      break;
-    case ']':
-      if (skip_sep(ls) == sep) {
-	save_and_next(ls);  /* skip 2nd `]' */
-	goto endloop;
-      }
-      break;
-    case '\n':
-    case '\r':
-      save(ls, '\n');
-      inclinenumber(ls);
-      if (!tv) lj_str_resetbuf(&ls->sb);  /* avoid wasting space */
-      break;
-    default:
-      if (tv) save_and_next(ls);
-      else next(ls);
-      break;
-    }
-  } endloop:
-  if (tv) {
-    GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep),
-				      ls->sb.n - 2*(2 + (MSize)sep));
-    setstrV(ls->L, tv, str);
-  }
-}
-
-static void read_string(LexState *ls, int delim, TValue *tv)
-{
-  save_and_next(ls);
-  while (ls->current != delim) {
-    switch (ls->current) {
-    case END_OF_STREAM:
-      lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
-      continue;
-    case '\n':
-    case '\r':
-      lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
-      continue;
-    case '\\': {
-      int c = next(ls);  /* Skip the '\\'. */
-      switch (c) {
-      case 'a': c = '\a'; break;
-      case 'b': c = '\b'; break;
-      case 'f': c = '\f'; break;
-      case 'n': c = '\n'; break;
-      case 'r': c = '\r'; break;
-      case 't': c = '\t'; break;
-      case 'v': c = '\v'; break;
-      case 'x':  /* Hexadecimal escape '\xXX'. */
-	c = (next(ls) & 15u) << 4;
-	if (!lj_char_isdigit(ls->current)) {
-	  if (!lj_char_isxdigit(ls->current)) goto err_xesc;
-	  c += 9 << 4;
-	}
-	c += (next(ls) & 15u);
-	if (!lj_char_isdigit(ls->current)) {
-	  if (!lj_char_isxdigit(ls->current)) goto err_xesc;
-	  c += 9;
-	}
-	break;
-      case 'z':  /* Skip whitespace. */
-	next(ls);
-	while (lj_char_isspace(ls->current))
-	  if (currIsNewline(ls)) inclinenumber(ls); else next(ls);
-	continue;
-      case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue;
-      case '\\': case '\"': case '\'': break;
-      case END_OF_STREAM: continue;
-      default:
-	if (!lj_char_isdigit(c))
-	  goto err_xesc;
-	c -= '0';  /* Decimal escape '\ddd'. */
-	if (lj_char_isdigit(next(ls))) {
-	  c = c*10 + (ls->current - '0');
-	  if (lj_char_isdigit(next(ls))) {
-	    c = c*10 + (ls->current - '0');
-	    if (c > 255) {
-	    err_xesc:
-	      lj_lex_error(ls, TK_string, LJ_ERR_XESC);
-	    }
-	    next(ls);
-	  }
-	}
-	save(ls, c);
-	continue;
-      }
-      save(ls, c);
-      next(ls);
-      continue;
-      }
-    default:
-      save_and_next(ls);
-      break;
-    }
-  }
-  save_and_next(ls);  /* skip delimiter */
-  setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2));
-}
-
-/* -- Main lexical scanner ------------------------------------------------ */
-
-static int llex(LexState *ls, TValue *tv)
-{
-  lj_str_resetbuf(&ls->sb);
-  for (;;) {
-    if (lj_char_isident(ls->current)) {
-      GCstr *s;
-      if (lj_char_isdigit(ls->current)) {  /* Numeric literal. */
-	lex_number(ls, tv);
-	return TK_number;
-      }
-      /* Identifier or reserved word. */
-      do {
-	save_and_next(ls);
-      } while (lj_char_isident(ls->current));
-      s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n);
-      setstrV(ls->L, tv, s);
-      if (s->reserved > 0)  /* Reserved word? */
-	return TK_OFS + s->reserved;
-      return TK_name;
-    }
-    switch (ls->current) {
-    case '\n':
-    case '\r':
-      inclinenumber(ls);
-      continue;
-    case ' ':
-    case '\t':
-    case '\v':
-    case '\f':
-      next(ls);
-      continue;
-    case '-':
-      next(ls);
-      if (ls->current != '-') return '-';
-      /* else is a comment */
-      next(ls);
-      if (ls->current == '[') {
-	int sep = skip_sep(ls);
-	lj_str_resetbuf(&ls->sb);  /* `skip_sep' may dirty the buffer */
-	if (sep >= 0) {
-	  read_long_string(ls, NULL, sep);  /* long comment */
-	  lj_str_resetbuf(&ls->sb);
-	  continue;
-	}
-      }
-      /* else short comment */
-      while (!currIsNewline(ls) && ls->current != END_OF_STREAM)
-	next(ls);
-      continue;
-    case '[': {
-      int sep = skip_sep(ls);
-      if (sep >= 0) {
-	read_long_string(ls, tv, sep);
-	return TK_string;
-      } else if (sep == -1) {
-	return '[';
-      } else {
-	lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM);
-	continue;
-      }
-      }
-    case '=':
-      next(ls);
-      if (ls->current != '=') return '='; else { next(ls); return TK_eq; }
-    case '<':
-      next(ls);
-      if (ls->current != '=') return '<'; else { next(ls); return TK_le; }
-    case '>':
-      next(ls);
-      if (ls->current != '=') return '>'; else { next(ls); return TK_ge; }
-    case '~':
-      next(ls);
-      if (ls->current != '=') return '~'; else { next(ls); return TK_ne; }
-    case ':':
-      next(ls);
-      if (ls->current != ':') return ':'; else { next(ls); return TK_label; }
-    case '"':
-    case '\'':
-      read_string(ls, ls->current, tv);
-      return TK_string;
-    case '.':
-      save_and_next(ls);
-      if (ls->current == '.') {
-	next(ls);
-	if (ls->current == '.') {
-	  next(ls);
-	  return TK_dots;   /* ... */
-	}
-	return TK_concat;   /* .. */
-      } else if (!lj_char_isdigit(ls->current)) {
-	return '.';
-      } else {
-	lex_number(ls, tv);
-	return TK_number;
-      }
-    case END_OF_STREAM:
-      return TK_eof;
-    default: {
-      int c = ls->current;
-      next(ls);
-      return c;  /* Single-char tokens (+ - / ...). */
-    }
-    }
-  }
-}
-
-/* -- Lexer API ----------------------------------------------------------- */
-
-/* Setup lexer state. */
-int lj_lex_setup(lua_State *L, LexState *ls)
-{
-  int header = 0;
-  ls->L = L;
-  ls->fs = NULL;
-  ls->n = 0;
-  ls->p = NULL;
-  ls->vstack = NULL;
-  ls->sizevstack = 0;
-  ls->vtop = 0;
-  ls->bcstack = NULL;
-  ls->sizebcstack = 0;
-  ls->lookahead = TK_eof;  /* No look-ahead token. */
-  ls->linenumber = 1;
-  ls->lastline = 1;
-  lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF);
-  next(ls);  /* Read-ahead first char. */
-  if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb &&
-      char2int(ls->p[1]) == 0xbf) {  /* Skip UTF-8 BOM (if buffered). */
-    ls->n -= 2;
-    ls->p += 2;
-    next(ls);
-    header = 1;
-  }
-  if (ls->current == '#') {  /* Skip POSIX #! header line. */
-    do {
-      next(ls);
-      if (ls->current == END_OF_STREAM) return 0;
-    } while (!currIsNewline(ls));
-    inclinenumber(ls);
-    header = 1;
-  }
-  if (ls->current == LUA_SIGNATURE[0]) {  /* Bytecode dump. */
-    if (header) {
-      /*
-      ** Loading bytecode with an extra header is disabled for security
-      ** reasons. This may circumvent the usual check for bytecode vs.
-      ** Lua code by looking at the first char. Since this is a potential
-      ** security violation no attempt is made to echo the chunkname either.
-      */
-      setstrV(L, L->top++, lj_err_str(L, LJ_ERR_BCBAD));
-      lj_err_throw(L, LUA_ERRSYNTAX);
-    }
-    return 1;
-  }
-  return 0;
-}
-
-/* Cleanup lexer state. */
-void lj_lex_cleanup(lua_State *L, LexState *ls)
-{
-  global_State *g = G(L);
-  lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
-  lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
-  lj_str_freebuf(g, &ls->sb);
-}
-
-void lj_lex_next(LexState *ls)
-{
-  ls->lastline = ls->linenumber;
-  if (LJ_LIKELY(ls->lookahead == TK_eof)) {  /* No lookahead token? */
-    ls->token = llex(ls, &ls->tokenval);  /* Get next token. */
-  } else {  /* Otherwise return lookahead token. */
-    ls->token = ls->lookahead;
-    ls->lookahead = TK_eof;
-    ls->tokenval = ls->lookaheadval;
-  }
-}
-
-LexToken lj_lex_lookahead(LexState *ls)
-{
-  lua_assert(ls->lookahead == TK_eof);
-  ls->lookahead = llex(ls, &ls->lookaheadval);
-  return ls->lookahead;
-}
-
-const char *lj_lex_token2str(LexState *ls, LexToken token)
-{
-  if (token > TK_OFS)
-    return tokennames[token-TK_OFS-1];
-  else if (!lj_char_iscntrl(token))
-    return lj_str_pushf(ls->L, "%c", token);
-  else
-    return lj_str_pushf(ls->L, "char(%d)", token);
-}
-
-void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...)
-{
-  const char *tok;
-  va_list argp;
-  if (token == 0) {
-    tok = NULL;
-  } else if (token == TK_name || token == TK_string || token == TK_number) {
-    save(ls, '\0');
-    tok = ls->sb.buf;
-  } else {
-    tok = lj_lex_token2str(ls, token);
-  }
-  va_start(argp, em);
-  lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp);
-  va_end(argp);
-}
-
-void lj_lex_init(lua_State *L)
-{
-  uint32_t i;
-  for (i = 0; i < TK_RESERVED; i++) {
-    GCstr *s = lj_str_newz(L, tokennames[i]);
-    fixstring(s);  /* Reserved words are never collected. */
-    s->reserved = (uint8_t)(i+1);
-  }
-}
-

+ 0 - 339
jni/LuaJIT-2.0.1/src/lj_str.c

@@ -1,339 +0,0 @@
-/*
-** String handling.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
-**
-** Portions taken verbatim or adapted from the Lua interpreter.
-** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
-*/
-
-#include <stdio.h>
-
-#define lj_str_c
-#define LUA_CORE
-
-#include "lj_obj.h"
-#include "lj_gc.h"
-#include "lj_err.h"
-#include "lj_str.h"
-#include "lj_state.h"
-#include "lj_char.h"
-
-/* -- String interning ---------------------------------------------------- */
-
-/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
-int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
-{
-  MSize i, n = a->len > b->len ? b->len : a->len;
-  for (i = 0; i < n; i += 4) {
-    /* Note: innocuous access up to end of string + 3. */
-    uint32_t va = *(const uint32_t *)(strdata(a)+i);
-    uint32_t vb = *(const uint32_t *)(strdata(b)+i);
-    if (va != vb) {
-#if LJ_LE
-      va = lj_bswap(va); vb = lj_bswap(vb);
-#endif
-      i -= n;
-      if ((int32_t)i >= -3) {
-	va >>= 32+(i<<3); vb >>= 32+(i<<3);
-	if (va == vb) break;
-      }
-      return va < vb ? -1 : 1;
-    }
-  }
-  return (int32_t)(a->len - b->len);
-}
-
-/* Fast string data comparison. Caveat: unaligned access to 1st string! */
-static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
-{
-  MSize i = 0;
-  lua_assert(len > 0);
-  lua_assert((((uintptr_t)a + len) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
-  do {  /* Note: innocuous access up to end of string + 3. */
-    uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
-    if (v) {
-      i -= len;
-#if LJ_LE
-      return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
-#else
-      return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
-#endif
-    }
-    i += 4;
-  } while (i < len);
-  return 0;
-}
-
-/* Resize the string hash table (grow and shrink). */
-void lj_str_resize(lua_State *L, MSize newmask)
-{
-  global_State *g = G(L);
-  GCRef *newhash;
-  MSize i;
-  if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
-    return;  /* No resizing during GC traversal or if already too big. */
-  newhash = lj_mem_newvec(L, newmask+1, GCRef);
-  memset(newhash, 0, (newmask+1)*sizeof(GCRef));
-  for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
-    GCobj *p = gcref(g->strhash[i]);
-    while (p) {  /* Follow each hash chain and reinsert all strings. */
-      MSize h = gco2str(p)->hash & newmask;
-      GCobj *next = gcnext(p);
-      /* NOBARRIER: The string table is a GC root. */
-      setgcrefr(p->gch.nextgc, newhash[h]);
-      setgcref(newhash[h], p);
-      p = next;
-    }
-  }
-  lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
-  g->strmask = newmask;
-  g->strhash = newhash;
-}
-
-/* Intern a string and return string object. */
-GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
-{
-  global_State *g;
-  GCstr *s;
-  GCobj *o;
-  MSize len = (MSize)lenx;
-  MSize a, b, h = len;
-  if (lenx >= LJ_MAX_STR)
-    lj_err_msg(L, LJ_ERR_STROV);
-  g = G(L);
-  /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
-  if (len >= 4) {  /* Caveat: unaligned access! */
-    a = lj_getu32(str);
-    h ^= lj_getu32(str+len-4);
-    b = lj_getu32(str+(len>>1)-2);
-    h ^= b; h -= lj_rol(b, 14);
-    b += lj_getu32(str+(len>>2)-1);
-  } else if (len > 0) {
-    a = *(const uint8_t *)str;
-    h ^= *(const uint8_t *)(str+len-1);
-    b = *(const uint8_t *)(str+(len>>1));
-    h ^= b; h -= lj_rol(b, 14);
-  } else {
-    return &g->strempty;
-  }
-  a ^= h; a -= lj_rol(h, 11);
-  b ^= a; b -= lj_rol(a, 25);
-  h ^= b; h -= lj_rol(b, 16);
-  /* Check if the string has already been interned. */
-  o = gcref(g->strhash[h & g->strmask]);
-  if (LJ_LIKELY((((uintptr_t)str + len) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
-    while (o != NULL) {
-      GCstr *sx = gco2str(o);
-      if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
-	/* Resurrect if dead. Can only happen with fixstring() (keywords). */
-	if (isdead(g, o)) flipwhite(o);
-	return sx;  /* Return existing string. */
-      }
-      o = gcnext(o);
-    }
-  } else {  /* Slow path: end of string is too close to a page boundary. */
-    while (o != NULL) {
-      GCstr *sx = gco2str(o);
-      if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
-	/* Resurrect if dead. Can only happen with fixstring() (keywords). */
-	if (isdead(g, o)) flipwhite(o);
-	return sx;  /* Return existing string. */
-      }
-      o = gcnext(o);
-    }
-  }
-  /* Nope, create a new string. */
-  s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
-  newwhite(g, s);
-  s->gct = ~LJ_TSTR;
-  s->len = len;
-  s->hash = h;
-  s->reserved = 0;
-  memcpy(strdatawr(s), str, len);
-  strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
-  /* Add it to string hash table. */
-  h &= g->strmask;
-  s->nextgc = g->strhash[h];
-  /* NOBARRIER: The string table is a GC root. */
-  setgcref(g->strhash[h], obj2gco(s));
-  if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
-    lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
-  return s;  /* Return newly interned string. */
-}
-
-void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
-{
-  g->strnum--;
-  lj_mem_free(g, s, sizestring(s));
-}
-
-/* -- Type conversions ---------------------------------------------------- */
-
-/* Print number to buffer. Canonicalizes non-finite values. */
-size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
-{
-  if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) {  /* Finite? */
-    lua_Number n = o->n;
-#if __BIONIC__
-    if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
-#endif
-    return (size_t)lua_number2str(s, n);
-  } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
-    s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
-  } else if ((o->u32.hi & 0x80000000) == 0) {
-    s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
-  } else {
-    s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
-  }
-}
-
-/* Print integer to buffer. Returns pointer to start. */
-char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
-{
-  uint32_t u = (uint32_t)(k < 0 ? -k : k);
-  p += 1+10;
-  do { *--p = (char)('0' + u % 10); } while (u /= 10);
-  if (k < 0) *--p = '-';
-  return p;
-}
-
-/* Convert number to string. */
-GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
-{
-  char buf[LJ_STR_NUMBUF];
-  size_t len = lj_str_bufnum(buf, (TValue *)np);
-  return lj_str_new(L, buf, len);
-}
-
-/* Convert integer to string. */
-GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
-{
-  char s[1+10];
-  char *p = lj_str_bufint(s, k);
-  return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
-}
-
-GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
-{
-  return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
-}
-
-/* -- String formatting --------------------------------------------------- */
-
-static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
-{
-  char *p;
-  MSize i;
-  if (sb->n + len > sb->sz) {
-    MSize sz = sb->sz * 2;
-    while (sb->n + len > sz) sz = sz * 2;
-    lj_str_resizebuf(L, sb, sz);
-  }
-  p = sb->buf + sb->n;
-  sb->n += len;
-  for (i = 0; i < len; i++) p[i] = str[i];
-}
-
-static void addchar(lua_State *L, SBuf *sb, int c)
-{
-  if (sb->n + 1 > sb->sz) {
-    MSize sz = sb->sz * 2;
-    lj_str_resizebuf(L, sb, sz);
-  }
-  sb->buf[sb->n++] = (char)c;
-}
-
-/* Push formatted message as a string object to Lua stack. va_list variant. */
-const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
-{
-  SBuf *sb = &G(L)->tmpbuf;
-  lj_str_needbuf(L, sb, (MSize)strlen(fmt));
-  lj_str_resetbuf(sb);
-  for (;;) {
-    const char *e = strchr(fmt, '%');
-    if (e == NULL) break;
-    addstr(L, sb, fmt, (MSize)(e-fmt));
-    /* This function only handles %s, %c, %d, %f and %p formats. */
-    switch (e[1]) {
-    case 's': {
-      const char *s = va_arg(argp, char *);
-      if (s == NULL) s = "(null)";
-      addstr(L, sb, s, (MSize)strlen(s));
-      break;
-      }
-    case 'c':
-      addchar(L, sb, va_arg(argp, int));
-      break;
-    case 'd': {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
-      addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
-      break;
-      }
-    case 'f': {
-      char buf[LJ_STR_NUMBUF];
-      TValue tv;
-      MSize len;
-      tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
-      len = (MSize)lj_str_bufnum(buf, &tv);
-      addstr(L, sb, buf, len);
-      break;
-      }
-    case 'p': {
-#define FMTP_CHARS	(2*sizeof(ptrdiff_t))
-      char buf[2+FMTP_CHARS];
-      ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
-      ptrdiff_t i, lasti = 2+FMTP_CHARS;
-      if (p == 0) {
-	addstr(L, sb, "NULL", 4);
-	break;
-      }
-#if LJ_64
-      /* Shorten output for 64 bit pointers. */
-      lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
-#endif
-      buf[0] = '0';
-      buf[1] = 'x';
-      for (i = lasti-1; i >= 2; i--, p >>= 4)
-	buf[i] = "0123456789abcdef"[(p & 15)];
-      addstr(L, sb, buf, (MSize)lasti);
-      break;
-      }
-    case '%':
-      addchar(L, sb, '%');
-      break;
-    default:
-      addchar(L, sb, '%');
-      addchar(L, sb, e[1]);
-      break;
-    }
-    fmt = e+2;
-  }
-  addstr(L, sb, fmt, (MSize)strlen(fmt));
-  setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
-  incr_top(L);
-  return strVdata(L->top - 1);
-}
-
-/* Push formatted message as a string object to Lua stack. Vararg variant. */
-const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
-{
-  const char *msg;
-  va_list argp;
-  va_start(argp, fmt);
-  msg = lj_str_pushvf(L, fmt, argp);
-  va_end(argp);
-  return msg;
-}
-
-/* -- Buffer handling ----------------------------------------------------- */
-
-char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
-{
-  if (sz > sb->sz) {
-    if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
-    lj_str_resizebuf(L, sb, sz);
-  }
-  return sb->buf;
-}
-

+ 0 - 50
jni/LuaJIT-2.0.1/src/lj_str.h

@@ -1,50 +0,0 @@
-/*
-** String handling.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
-*/
-
-#ifndef _LJ_STR_H
-#define _LJ_STR_H
-
-#include <stdarg.h>
-
-#include "lj_obj.h"
-
-/* String interning. */
-LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
-LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
-LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
-LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
-
-#define lj_str_newz(L, s)	(lj_str_new(L, s, strlen(s)))
-#define lj_str_newlit(L, s)	(lj_str_new(L, "" s, sizeof(s)-1))
-
-/* Type conversions. */
-LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
-LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
-LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
-LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
-LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
-
-#define LJ_STR_INTBUF		(1+10)
-#define LJ_STR_NUMBUF		LUAI_MAXNUMBER2STR
-
-/* String formatting. */
-LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
-LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
-#if defined(__GNUC__)
-  __attribute__ ((format (printf, 2, 3)))
-#endif
-  ;
-
-/* Resizable string buffers. Struct definition in lj_obj.h. */
-LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
-
-#define lj_str_initbuf(sb)	((sb)->buf = NULL, (sb)->sz = 0)
-#define lj_str_resetbuf(sb)	((sb)->n = 0)
-#define lj_str_resizebuf(L, sb, size) \
-  ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
-   (sb)->sz = (size))
-#define lj_str_freebuf(g, sb)	lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
-
-#endif

+ 0 - 0
jni/LuaJIT-2.0.1/Android.mk → jni/LuaJIT-2.1/Android.mk


+ 1 - 1
jni/LuaJIT-2.0.1/COPYRIGHT → jni/LuaJIT-2.1/COPYRIGHT

@@ -1,7 +1,7 @@
 ===============================================================================
 LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
 
-Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

+ 21 - 11
jni/LuaJIT-2.0.1/Makefile → jni/LuaJIT-2.1/Makefile

@@ -10,13 +10,14 @@
 # For MSVC, please follow the instructions given in src/msvcbuild.bat.
 # For MinGW and Cygwin, cd to src and run make with the Makefile there.
 #
-# Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 ##############################################################################
 
 MAJVER=  2
-MINVER=  0
-RELVER=  1
-VERSION= $(MAJVER).$(MINVER).$(RELVER)
+MINVER=  1
+RELVER=  0
+PREREL=  -beta1
+VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
 ABIVER=  5.1
 
 ##############################################################################
@@ -25,11 +26,12 @@ ABIVER=  5.1
 # the paths in src/luaconf.h, too. Note: PREFIX must be an absolute path!
 #
 export PREFIX= /usr/local
+export MULTILIB= lib
 ##############################################################################
 
 DPREFIX= $(DESTDIR)$(PREFIX)
 INSTALL_BIN=   $(DPREFIX)/bin
-INSTALL_LIB=   $(DPREFIX)/lib
+INSTALL_LIB=   $(DPREFIX)/$(MULTILIB)
 INSTALL_SHARE= $(DPREFIX)/share
 INSTALL_INC=   $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER)
 
@@ -73,7 +75,8 @@ INSTALL_X= install -m 0755
 INSTALL_F= install -m 0644
 UNINSTALL= $(RM)
 LDCONFIG= ldconfig -n
-SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|"
+SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \
+            -e "s|^multilib=.*|multilib=$(MULTILIB)|"
 
 FILE_T= luajit
 FILE_A= libluajit.a
@@ -81,8 +84,9 @@ FILE_SO= libluajit.so
 FILE_MAN= luajit.1
 FILE_PC= luajit.pc
 FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
-FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \
-	      dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua
+FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
+	      dis_x86.lua dis_x64.lua dis_arm.lua dis_ppc.lua \
+	      dis_mips.lua dis_mipsel.lua vmdef.lua
 
 ifeq (,$(findstring Windows,$(OS)))
   ifeq (Darwin,$(shell uname -s))
@@ -107,7 +111,7 @@ install: $(INSTALL_DEP)
 	$(MKDIR) $(INSTALL_DIRS)
 	cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
 	cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
-	$(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
+	$(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
 	cd src && test -f $(FILE_SO) && \
 	  $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
 	  $(LDCONFIG) $(INSTALL_LIB) && \
@@ -119,12 +123,18 @@ install: $(INSTALL_DEP)
 	  $(RM) $(FILE_PC).tmp
 	cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
 	cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
-	$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
 	@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
+	@echo ""
+	@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
+	@echo "You can do this now by running this command (with sudo):"
+	@echo ""
+	@echo "  $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
+	@echo ""
+
 
 uninstall:
 	@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
-	$(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
+	$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
 	for file in $(FILES_JITLIB); do \
 	  $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
 	  done

+ 3 - 3
jni/LuaJIT-2.0.1/README → jni/LuaJIT-2.1/README

@@ -1,11 +1,11 @@
-README for LuaJIT 2.0.1
------------------------
+README for LuaJIT 2.1.0-beta1
+-----------------------------
 
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 
 Project Homepage: http://luajit.org/
 
-LuaJIT is Copyright (C) 2005-2013 Mike Pall.
+LuaJIT is Copyright (C) 2005-2015 Mike Pall.
 LuaJIT is free software, released under the MIT license.
 See full Copyright Notice in the COPYRIGHT file or in luajit.h.
 

BIN
jni/LuaJIT-2.1/android/armeabi-v7a/libluajit.a


BIN
jni/LuaJIT-2.1/android/armeabi/libluajit.a


+ 2 - 2
jni/LuaJIT-2.0.1/buildandroid.sh → jni/LuaJIT-2.1/buildandroid.sh

@@ -26,8 +26,8 @@ if [[ x$NDK = "x" ]]; then
     exit 1
 fi
 
-NDKABI=8
-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
+NDKABI=9
+NDKVER=$NDK/toolchains/arm-linux-androideabi-4.8
 NDKP=$NDKVER/prebuilt/${host_os}-${host_arch}/bin/arm-linux-androideabi-
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
 CFLAGS=""

+ 1 - 1
jni/LuaJIT-2.0.1/doc/bluequad-print.css → jni/LuaJIT-2.1/doc/bluequad-print.css

@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2013 Mike Pall.
+/* Copyright (C) 2004-2015 Mike Pall.
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.

+ 1 - 1
jni/LuaJIT-2.0.1/doc/bluequad.css → jni/LuaJIT-2.1/doc/bluequad.css

@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2013 Mike Pall.
+/* Copyright (C) 2004-2015 Mike Pall.
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.

+ 177 - 232
jni/LuaJIT-2.0.1/doc/changes.html → jni/LuaJIT-2.1/doc/changes.html

@@ -4,7 +4,7 @@
 <title>LuaJIT Change History</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -44,6 +44,8 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -63,7 +65,7 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
 <div id="main">
 <p>
 This is a list of changes between the released versions of LuaJIT.<br>
-The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT&nbsp;2.0.1</strong>.<br>
+The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT&nbsp;2.0.4</strong>.<br>
 </p>
 <p>
 Please check the
@@ -72,6 +74,178 @@ to see whether newer versions are available.
 </p>
 
 <div class="major" style="background: #d0d0ff;">
+<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 &mdash; 2015-08-25</h2>
+<p>
+This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0.
+Please take a look at the commit history for more details.
+</p>
+<ul>
+<li>Changes to the VM core:
+<ul>
+<li>Add low-overhead profiler (<tt>-jp</tt>).</li>
+<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li>
+<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li>
+<li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li>
+<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li>
+<li>Parse binary number literals (<tt>0bxxx</tt>).</li>
+</ul></li>
+<li>Improvements to the JIT compiler:
+<ul>
+<li>Add trace stitching.</li>
+<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
+<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
+<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>
+<li>Compile string concatenations (<tt>BC_CAT</tt>).</li>
+<li>Compile <tt>__concat</tt> metamethod.</li>
+<li>Various minor optimizations.</li>
+</ul></li>
+<li>Internal Changes:
+<ul>
+<li>Add support for embedding LuaJIT bytecode for builtins.</li>
+<li>Replace various builtins with embedded bytecode.</li>
+<li>Refactor string buffers and string formatting.</li>
+<li>Remove obsolete non-truncating number to integer conversions.</li>
+</ul></li>
+<li>Ports:
+<ul>
+<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li>
+<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt> mode).</li>
+<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li>
+<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li>
+<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li>
+<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li>
+<li>PPC/e500: Drop support for this architecture.</li>
+</ul></li>
+<li>FFI library:
+<ul>
+<li>FFI: Add 64 bit bitwise operations.</li>
+<li>FFI: Compile VLA/VLS and large cdata allocations with default initialization.</li>
+<li>FFI: Compile conversions from functions to function pointers.</li>
+<li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li>
+<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li>
+<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li>
+<li>FFI: Add <tt>ssize_t</tt> declaration.</li>
+</ul></li>
+</ul>
+</div>
+
+<div class="major" style="background: #ffffd0;">
+<h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 &mdash; 2015-05-14</h2>
+<ul>
+<li>Fix stack check in narrowing optimization.</li>
+<li>Fix Lua/C API typecheck error for special indexes.</li>
+<li>Fix string to number conversion.</li>
+<li>Fix lexer error for chunks without tokens.</li>
+<li>Don't compile <tt>IR_RETF</tt> after <tt>CALLT</tt> to ff with-side effects.</li>
+<li>Fix <tt>BC_UCLO</tt>/<tt>BC_JMP</tt> join optimization in Lua parser.</li>
+<li>Fix corner case in string to number conversion.</li>
+<li>Gracefully handle <tt>lua_error()</tt> for a suspended coroutine.</li>
+<li>Avoid error messages when building with Clang.</li>
+<li>Fix snapshot #0 handling for traces with a stack check on entry.</li>
+<li>Fix fused constant loads under high register pressure.</li>
+<li>Invalidate backpropagation cache after DCE.</li>
+<li>Fix ABC elimination.</li>
+<li>Fix debug info for main chunk of stripped bytecode.</li>
+<li>Fix FOLD rule for <tt>string.sub(s, ...) == k</tt>.</li>
+<li>Fix FOLD rule for <tt>STRREF</tt> of <tt>SNEW</tt>.</li>
+<li>Fix frame traversal while searching for error function.</li>
+<li>Prevent GC estimate miscalculation due to buffer growth.</li>
+<li>Prevent adding side traces for stack checks.</li>
+<li>Fix top slot calculation for snapshots with continuations.</li>
+<li>Fix check for reuse of SCEV results in <tt>FORL</tt>.</li>
+<li>Add PS Vita port.</li>
+<li>Fix compatibility issues with Illumos.</li>
+<li>Fix DragonFly build (unsupported).</li>
+<li>OpenBSD/x86: Better executable memory allocation for W^X mode.</li>
+<li>x86: Fix argument checks for <tt>ipairs()</tt> iterator.</li>
+<li>x86: <tt>lj_math_random_step()</tt> clobbers XMM regs on OSX Clang.</li>
+<li>x86: Fix code generation for unused result of <tt>math.random()</tt>.</li>
+<li>x64: Allow building with <tt>LUAJIT_USE_SYSMALLOC</tt> and <tt>LUAJIT_USE_VALGRIND</tt>.</li>
+<li>x86/x64: Fix argument check for bit shifts.</li>
+<li>x86/x64: Fix code generation for fused test/arith ops.</li>
+<li>ARM: Fix write barrier check in <tt>BC_USETS</tt>.</li>
+<li>PPC: Fix red zone overflow in machine code generation.</li>
+<li>PPC: Don't use <tt>mcrxr</tt> on PPE.</li>
+<li>Various archs: Fix excess stack growth in interpreter.</li>
+<li>FFI: Fix FOLD rule for <tt>TOBIT</tt> + <tt>CONV num.u32</tt>.</li>
+<li>FFI: Prevent DSE across <tt>ffi.string()</tt>.</li>
+<li>FFI: No meta fallback when indexing pointer to incomplete struct.</li>
+<li>FFI: Fix initialization of unions of subtypes.</li>
+<li>FFI: Fix cdata vs. non-cdata arithmetic and comparisons.</li>
+<li>FFI: Fix <tt>__index</tt>/<tt>__newindex</tt> metamethod resolution for ctypes.</li>
+<li>FFI: Fix compilation of reference field access.</li>
+<li>FFI: Fix frame traversal for backtraces with FFI callbacks.</li>
+<li>FFI: Fix recording of indexing a struct pointer ctype object itself.</li>
+<li>FFI: Allow non-scalar cdata to be compared for equality by address.</li>
+<li>FFI: Fix pseudo type conversions for type punning.</li>
+</ul>
+
+<h2 id="LuaJIT-2.0.3">LuaJIT 2.0.3 &mdash; 2014-03-12</h2>
+<ul>
+<li>Add PS4 port.</li>
+<li>Add support for multilib distro builds.</li>
+<li>Fix OSX build.</li>
+<li>Fix MinGW build.</li>
+<li>Fix Xbox 360 build.</li>
+<li>Improve ULOAD forwarding for open upvalues.</li>
+<li>Fix GC steps threshold handling when called by JIT-compiled code.</li>
+<li>Fix argument checks for <tt>math.deg()</tt> and <tt>math.rad()</tt>.</li>
+<li>Fix <tt>jit.flush(func|true)</tt>.</li>
+<li>Respect <tt>jit.off(func)</tt> when returning to a function, too.</li>
+<li>Fix compilation of <tt>string.byte(s, nil, n)</tt>.</li>
+<li>Fix line number for relocated bytecode after closure fixup</li>
+<li>Fix frame traversal for backtraces.</li>
+<li>Fix ABC elimination.</li>
+<li>Fix handling of redundant PHIs.</li>
+<li>Fix snapshot restore for exit to function header.</li>
+<li>Fix type punning alias analysis for constified pointers</li>
+<li>Fix call unroll checks in the presence of metamethod frames.</li>
+<li>Fix initial maxslot for down-recursive traces.</li>
+<li>Prevent BASE register coalescing if parent uses <tt>IR_RETF</tt>.</li>
+<li>Don't purge modified function from stack slots in <tt>BC_RET</tt>.</li>
+<li>Fix recording of <tt>BC_VARG</tt>.</li>
+<li>Don't access dangling reference to reallocated IR.</li>
+<li>Fix frame depth display for bytecode dump in <tt>-jdump</tt>.</li>
+<li>ARM: Fix register allocation when rematerializing FPRs.</li>
+<li>x64: Fix store to upvalue for lightuserdata values.</li>
+<li>FFI: Add missing GC steps for callback argument conversions.</li>
+<li>FFI: Properly unload loaded DLLs.</li>
+<li>FFI: Fix argument checks for <tt>ffi.string()</tt>.</li>
+<li>FFI/x64: Fix passing of vector arguments to calls.</li>
+<li>FFI: Rehash finalizer table after GC cycle, if needed.</li>
+<li>FFI: Fix <tt>cts-&gt;L</tt> for cdata unsinking in snapshot restore.</li>
+</ul>
+
+<h2 id="LuaJIT-2.0.2">LuaJIT 2.0.2 &mdash; 2013-06-03</h2>
+<ul>
+<li>Fix memory access check for fast string interning.</li>
+<li>Fix MSVC intrinsics for older versions.</li>
+<li>Add missing GC steps for <tt>io.*</tt> functions.</li>
+<li>Fix spurious red zone overflows in machine code generation.</li>
+<li>Fix jump-range constrained mcode allocation.</li>
+<li>Inhibit DSE for implicit loads via calls.</li>
+<li>Fix builtin string to number conversion for overflow digits.</li>
+<li>Fix optional argument handling while recording builtins.</li>
+<li>Fix optional argument handling in <tt>table.concat()</tt>.</li>
+<li>Add partial support for building with MingW64 GCC 4.8-SEH.</li>
+<li>Add missing PHI barrier to <tt>string.sub(str, a, b) == kstr</tt> FOLD rule.</li>
+<li>Fix compatibility issues with Illumos.</li>
+<li>ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.</li>
+<li>MIPS: Fix cache flush/sync for JIT-compiled code jump area.</li>
+<li>PPC: Add <tt>plt</tt> suffix for external calls from assembler code.</li>
+<li>FFI: Fix snapshot substitution in SPLIT pass.</li>
+<li>FFI/x86: Fix register allocation for 64 bit comparisons.</li>
+<li>FFI: Fix tailcall in lowest frame to C&nbsp;function with bool result.</li>
+<li>FFI: Ignore <tt>long</tt> type specifier in <tt>ffi.istype()</tt>.</li>
+<li>FFI: Fix calling conventions for 32 bit OSX and iOS simulator (struct returns).</li>
+<li>FFI: Fix calling conventions for ARM hard-float EABI (nested structs).</li>
+<li>FFI: Improve error messages for arithmetic and comparison operators.</li>
+<li>FFI: Insert no-op type conversion for pointer to integer cast.</li>
+<li>FFI: Fix unroll limit for <tt>ffi.fill()</tt>.</li>
+<li>FFI: Must sink <tt>XBAR</tt> together with <tt>XSTORE</tt>s.</li>
+<li>FFI: Preserve intermediate string for <tt>const&nbsp;char&nbsp;*</tt> conversion.</li>
+</ul>
+
 <h2 id="LuaJIT-2.0.1">LuaJIT 2.0.1 &mdash; 2013-02-19</h2>
 <ul>
 <li>Don't clear frame for out-of-memory error.</li>
@@ -619,240 +793,11 @@ This matches the behavior of Lua 5.1, but not the specification.</li>
 no point in listing differences over earlier versions.</li>
 </ul>
 </div>
-
-<div class="major" style="background: #ffff80;">
-<h2 id="LuaJIT-1.1.8">LuaJIT 1.1.8 &mdash; 2012-04-16</h2>
-<ul>
-<li>Merged with Lua 5.1.5. Also integrated fixes for all
-<a href="http://www.lua.org/bugs.html#5.1.5"><span class="ext">&raquo;</span>&nbsp;<span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.5</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.7">LuaJIT 1.1.7 &mdash; 2011-05-05</h2>
-<ul>
-<li>Added fixes for the
-<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.6">LuaJIT 1.1.6 &mdash; 2010-03-28</h2>
-<ul>
-<li>Added fixes for the
-<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
-<li>Removed wrong GC check in <tt>jit_createstate()</tt>.
-Thanks to Tim Mensch.</li>
-<li>Fixed bad assertions while compiling <tt>table.insert()</tt> and
-<tt>table.remove()</tt>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 &mdash; 2008-10-25</h2>
-<ul>
-<li>Merged with Lua 5.1.4. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.3</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 &mdash; 2008-02-05</h2>
-<ul>
-<li>Merged with Lua 5.1.3. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.2</a>.</li>
-<li>Fixed possible (but unlikely) stack corruption while compiling
-<tt>k^x</tt> expressions.</li>
-<li>Fixed DynASM template for cmpss instruction.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 &mdash; 2007-05-24</h2>
-<ul>
-<li>Merged with Lua 5.1.2. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.1</a>.</li>
-<li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li>
-<li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li>
-<li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li>
-<li>Fix some compiler warnings for DynASM glue (internal API change).</li>
-<li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li>
-<li>The loadable debug modules now handle redirection to stdout
-(e.g. <tt>-j&nbsp;trace=-</tt>).</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 &mdash; 2006-06-24</h2>
-<ul>
-<li>Fix MSVC inline assembly: use only local variables with
-<tt>lua_number2int()</tt>.</li>
-<li>Fix "attempt to call a thread value" bug on Mac OS X:
-make values of consts used as lightuserdata keys unique
-to avoid joining by the compiler/linker.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 &mdash; 2006-06-20</h2>
-<ul>
-<li>Merged with Lua 5.1.1. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1</a>.</li>
-<li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li>
-<li>Minor changes to DynASM: faster pre-processing, smaller encoding
-for some immediates.</li>
-</ul>
-<p>
-This release is in sync with Coco 1.1.1 (see the
-<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
-</p>
-
-<h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 &mdash; 2006-03-13</h2>
-<ul>
-<li>Merged with Lua 5.1 (final).</li>
-
-<li>New JIT call frame setup:
-<ul>
-<li>The C stack is kept 16 byte aligned (faster).
-Mandatory for Mac OS X on Intel, too.</li>
-<li>Faster calling conventions for internal C helper functions.</li>
-<li>Better instruction scheduling for function prologue, OP_CALL and
-OP_RETURN.</li>
-</ul></li>
-
-<li>Miscellaneous optimizations:
-<ul>
-<li>Faster loads of FP constants. Remove narrow-to-wide store-to-load
-forwarding stalls.</li>
-<li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves
-and FP to integer conversions.</li>
-<li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li>
-<li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>).
-With better accuracy than the C variant, too.</li>
-<li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or
-use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li>
-</ul></li>
-
-<li>Changes in the optimizer:
-<ul>
-<li>Improved hinting for table keys derived from table values
-(<tt>t1[t2[x]]</tt>).</li>
-<li>Lookup hinting now works with arbitrary object types and
-supports index chains, too.</li>
-<li>Generate type hints for arithmetic and comparison operators,
-OP_LEN, OP_CONCAT and OP_FORPREP.</li>
-<li>Remove several hint definitions in favour of a generic COMBINE hint.</li>
-<li>Complete rewrite of <tt>jit.opt_inline</tt> module
-(ex <tt>jit.opt_lib</tt>).</li>
-</ul></li>
-
-<li>Use adaptive deoptimization:
-<ul>
-<li>If runtime verification of a contract fails, the affected
-instruction is recompiled and patched on-the-fly.
-Regular programs will trigger deoptimization only occasionally.</li>
-<li>This avoids generating code for uncommon fallback cases
-most of the time. Generated code is up to 30% smaller compared to
-LuaJIT&nbsp;1.0.3.</li>
-<li>Deoptimization is used for many opcodes and contracts:
-<ul>
-<li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li>
-<li>Inlined calls: closure mismatch, parameter number and type mismatches.</li>
-<li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li>
-<li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT,
-OP_FORPREP: operand type and range mismatches.</li>
-</ul></li>
-<li>Complete redesign of the debug and traceback info
-(bytecode &harr; mcode) to support deoptimization.
-Much more flexible and needs only 50% of the space.</li>
-<li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and
-<tt>jit.dump</tt> handle deoptimization.</li>
-</ul></li>
-
-<li>Inlined many popular library functions
-(for commonly used arguments only):
-<ul>
-<li>Most <tt>math.*</tt> functions (the 18 most used ones)
-[2x-10x faster].</li>
-<li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt>
-[2x-10x faster].</li>
-<li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt>
-[3x-5x faster].</li>
-<li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt>
-[3x-5x faster].</li>
-<li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators
-[8x-15x faster].</li>
-</ul></li>
-
-<li>Changes in the core and loadable modules and the stand-alone executable:
-<ul>
-<li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt>
-and <tt>jit.arch</tt>.</li>
-<li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li>
-<li>The <tt>-j dump</tt> output now shows JSUB names, too.</li>
-<li>New x86 disassembler module written in pure Lua. No dependency
-on ndisasm anymore. Flexible API, very compact (500 lines)
-and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li>
-<li><tt>luajit -v</tt> prints the LuaJIT version and copyright
-on a separate line.</li>
-</ul></li>
-
-<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
-<li>Miscellaneous doc changes. Added a section about
-<a href="install.html#embedding">embedding LuaJIT</a>.</li>
-</ul>
-<p>
-This release is in sync with Coco 1.1.0 (see the
-<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
-</p>
-</div>
-
-<div class="major" style="background: #ffffd0;">
-<h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 &mdash; 2005-09-08</h2>
-<ul>
-<li>Even more docs.</li>
-<li>Unified closure checks in <tt>jit.*</tt>.</li>
-<li>Fixed some range checks in <tt>jit.util.*</tt>.</li>
-<li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li>
-<li>Merged with Lua 5.1 alpha (including early bug fixes).</li>
-</ul>
-<p>
-This is the first public release of LuaJIT.
-</p>
-
-<h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 &mdash; 2005-09-02</h2>
-<ul>
-<li>Add support for flushing the Valgrind translation cache <br>
-(<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li>
-<li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based
-variant for POSIX systems.</li>
-<li>Reorganized the C&nbsp;function signature handling in
-<tt>jit.opt_lib</tt>.</li>
-<li>Changed to index-based hints for inlining C&nbsp;functions.
-Still no support in the backend for inlining.</li>
-<li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li>
-<li>Misc. changes to the <tt>jit.*</tt> modules.</li>
-<li>Misc. changes to the Makefiles.</li>
-<li>Lots of new docs.</li>
-<li>Complete doc reorg.</li>
-</ul>
-<p>
-Not released because Lua 5.1 alpha came out today.
-</p>
-
-<h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 &mdash; 2005-08-31</h2>
-<ul>
-<li>Missing GC step in <tt>OP_CONCAT</tt>.</li>
-<li>Fix result handling for C &ndash;> JIT calls.</li>
-<li>Detect CPU feature bits.</li>
-<li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li>
-<li>Add fallback instructions for FP compares.</li>
-<li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li>
-<li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute
-(David Burgess).</li>
-<li>Misc. doc updates.</li>
-</ul>
-<p>
-Interim non-public release.
-Special thanks to Adam D. Moss for reporting most of the bugs.
-</p>
-
-<h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 &mdash; 2005-08-29</h2>
-<p>
-This is the initial non-public release of LuaJIT.
-</p>
-</div>
 <br class="flush">
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 5 - 3
jni/LuaJIT-2.0.1/doc/contact.html → jni/LuaJIT-2.1/doc/contact.html

@@ -4,7 +4,7 @@
 <title>Contact</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +41,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -84,7 +86,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
 <h2>Copyright</h2>
 <p>
 All documentation is
-Copyright &copy; 2005-2013 Mike Pall.
+Copyright &copy; 2005-2015 Mike Pall.
 </p>
 
 
@@ -92,7 +94,7 @@ Copyright &copy; 2005-2013 Mike Pall.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 4 - 2
jni/LuaJIT-2.0.1/doc/ext_c_api.html → jni/LuaJIT-2.1/doc/ext_c_api.html

@@ -4,7 +4,7 @@
 <title>Lua/C API Extensions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +41,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a class="current" href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -177,7 +179,7 @@ Also note that this mechanism is not without overhead.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 4 - 2
jni/LuaJIT-2.0.1/doc/ext_ffi.html → jni/LuaJIT-2.1/doc/ext_ffi.html

@@ -4,7 +4,7 @@
 <title>FFI Library</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +41,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -320,7 +322,7 @@ without undue conversion penalties.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 8 - 4
jni/LuaJIT-2.0.1/doc/ext_ffi_api.html → jni/LuaJIT-2.1/doc/ext_ffi_api.html

@@ -4,7 +4,7 @@
 <title>ffi.* API Functions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -46,6 +46,8 @@ td.abiparam { font-weight: bold; width: 6em; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -466,6 +468,8 @@ otherwise. The following parameters are currently defined:
 <td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
 <tr class="odd">
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
+<tr class="even">
+<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
 </table>
 
 <h3 id="ffi_os"><tt>ffi.os</tt></h3>
@@ -542,8 +546,8 @@ corresponding ctype.
 The parser for Lua source code treats numeric literals with the
 suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
 integers. Case doesn't matter, but uppercase is recommended for
-readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal
-(<tt>0x2aLL</tt>) literals.
+readability. It handles decimal (<tt>42LL</tt>), hexadecimal
+(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals.
 </p>
 <p>
 The imaginary part of complex numbers can be specified by suffixing
@@ -556,7 +560,7 @@ named <tt>i</tt>.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 27 - 7
jni/LuaJIT-2.0.1/doc/ext_ffi_semantics.html → jni/LuaJIT-2.1/doc/ext_ffi_semantics.html

@@ -4,7 +4,7 @@
 <title>FFI Semantics</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -46,6 +46,8 @@ td.convop { font-style: italic; width: 40%; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -183,6 +185,8 @@ a <tt>typedef</tt>, except re-declarations will be ignored):
 <tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
 <tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
 
+<li>From <tt>&lt;unistd.h&gt;</tt> (POSIX): <tt>ssize_t</tt>.</li>
+
 </ul>
 <p>
 You're encouraged to use these types in preference to
@@ -730,6 +734,22 @@ You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
 number (e.g. for regular floating-point calculations) with
 <tt>tonumber()</tt>. But note this may incur a precision loss.</li>
 
+<li><b>64&nbsp;bit bitwise operations</b>: the rules for 64&nbsp;bit
+arithmetic operators apply analogously.<br>
+
+Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt>
+converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and
+returns a Lua number.<br>
+
+For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the
+conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to
+<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br>
+
+For all other operations, only the first argument is used to determine
+the output type. This implies that a cdata number as a shift count for
+shifts and rotates is accepted, but that alone does <em>not</em> cause
+a cdata number output.
+
 </ul>
 
 <h3 id="cdata_comp">Comparisons of cdata objects</h3>
@@ -1188,7 +1208,9 @@ storing and initializing them are supported, yet.</li>
 <li>The <tt>volatile</tt> type qualifier is currently ignored by
 compiled code.</li>
 <li><a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a> silently
-ignores all re-declarations.</li>
+ignores most re-declarations. Note: avoid re-declarations which do not
+conform to C99. The implementation will eventually be changed to
+perform strict checks.</li>
 </ul>
 <p>
 The JIT compiler already handles a large subset of all FFI operations.
@@ -1203,9 +1225,8 @@ suboptimal performance, especially when used in inner loops:
 <li>Vector operations.</li>
 <li>Table initializers.</li>
 <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
-<li>Allocations of variable-length arrays or structs.</li>
-<li>Allocations of C&nbsp;types with a size &gt; 128&nbsp;bytes or an
-alignment &gt; 8&nbsp;bytes.</li>
+<li>Non-default initialization of VLA/VLS or large C&nbsp;types
+(&gt; 128&nbsp;bytes or &gt; 16 array elements.</li>
 <li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
 <li>Pointer differences for element sizes that are not a power of
 two.</li>
@@ -1222,7 +1243,6 @@ value.</li>
 Other missing features:
 </p>
 <ul>
-<li>Bit operations for 64&nbsp;bit types.</li>
 <li>Arithmetic for <tt>complex</tt> numbers.</li>
 <li>Passing structs by value to vararg C&nbsp;functions.</li>
 <li><a href="extensions.html#exceptions">C++ exception interoperability</a>
@@ -1233,7 +1253,7 @@ compiled.</li>
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 5 - 3
jni/LuaJIT-2.0.1/doc/ext_ffi_tutorial.html → jni/LuaJIT-2.1/doc/ext_ffi_tutorial.html

@@ -4,7 +4,7 @@
 <title>FFI Tutorial</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -48,6 +48,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -564,7 +566,7 @@ Thus it's not helpful and actually counter-productive to cache
 individual C&nbsp;functions like this:
 </p>
 <pre class="code">
-local <b>funca</b>, <b>funcb</b> = ffi.C.funcb, ffi.C.funcb -- <span style="color:#c00000;">Not helpful!</span>
+local <b>funca</b>, <b>funcb</b> = ffi.C.funca, ffi.C.funcb -- <span style="color:#c00000;">Not helpful!</span>
 local function foo(x, n)
   for i=1,n do <b>funcb</b>(<b>funca</b>(x, i), 1) end
 end
@@ -591,7 +593,7 @@ it to a local variable in the function scope is unnecessary.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 5 - 3
jni/LuaJIT-2.0.1/doc/ext_jit.html → jni/LuaJIT-2.1/doc/ext_jit.html

@@ -4,7 +4,7 @@
 <title>jit.* Library</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +41,8 @@
 <a class="current" href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -151,7 +153,7 @@ Contains the target OS name:
 <h3 id="jit_arch"><tt>jit.arch</tt></h3>
 <p>
 Contains the target architecture name:
-"x86", "x64" or "ppcspe".
+"x86", "x64", "arm", "ppc", or "mips".
 </p>
 
 <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
@@ -189,7 +191,7 @@ if you want to know more.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 365 - 0
jni/LuaJIT-2.1/doc/ext_profiler.html

@@ -0,0 +1,365 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Profiler</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Profiler</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a class="current" href="ext_profiler.html">Profiler</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT has an integrated statistical profiler with very low overhead. It
+allows sampling the currently executing stack and other parameters in
+regular intervals.
+</p>
+<p>
+The integrated profiler can be accessed from three levels:
+</p>
+<ul>
+<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the
+<a href="#j_p"><tt>-jp</tt></a> command line option.</li>
+<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li>
+<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li>
+</ul>
+
+<h2 id="hl_profiler">High-Level Profiler</h2>
+<p>
+The bundled high-level profiler offers basic profiling functionality. It
+generates simple textual summaries or source code annotations. It can be
+accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option
+or from Lua code by loading the underlying <tt>jit.p</tt> module.
+</p>
+<p>
+To cut to the chase &mdash; run this to get a CPU usage profile by
+function name:
+</p>
+<pre class="code">
+luajit -jp myapp.lua
+</pre>
+<p>
+It's <em>not</em> a stated goal of the bundled profiler to add every
+possible option or to cater for special profiling needs. The low-level
+profiler APIs are documented below. They may be used by third-party
+authors to implement advanced functionality, e.g. IDE integration or
+graphical profilers.
+</p>
+<p>
+Note: Sampling works for both interpreted and JIT-compiled code. The
+results for JIT-compiled code may sometimes be surprising. LuaJIT
+heavily optimizes and inlines Lua code &mdash; there's no simple
+one-to-one correspondence between source code lines and the sampled
+machine code.
+</p>
+
+<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3>
+<p>
+The <tt>-jp</tt> command line option starts the high-level profiler.
+When the application run by the command line terminates, the profiler
+stops and writes the results to <tt>stdout</tt> or to the specified
+<tt>output</tt> file.
+</p>
+<p>
+The <tt>options</tt> argument specifies how the profiling is to be
+performed:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Stack dump: function name, otherwise module:line.
+This is the default mode.</li>
+<li><tt>F</tt> &mdash; Stack dump: ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Stack dump: module:line.</li>
+<li><tt>&lt;number&gt;</tt> &mdash; stack dump depth (callee &larr;
+caller). Default: 1.</li>
+<li><tt>-&lt;number&gt;</tt> &mdash; Inverse stack dump depth (caller
+&rarr; callee).</li>
+<li><tt>s</tt> &mdash; Split stack dump after first stack level. Implies
+depth&nbsp;&ge;&nbsp;2 or depth&nbsp;&le;&nbsp;-2.</li>
+<li><tt>p</tt> &mdash; Show full path for module names.</li>
+<li><tt>v</tt> &mdash; Show VM states.</li>
+<li><tt>z</tt> &mdash; Show <a href="#jit_zone">zones</a>.</li>
+<li><tt>r</tt> &mdash; Show raw sample counts. Default: show percentages.</li>
+<li><tt>a</tt> &mdash; Annotate excerpts from source code files.</li>
+<li><tt>A</tt> &mdash; Annotate complete source code files.</li>
+<li><tt>G</tt> &mdash; Produce raw output suitable for graphical tools.</li>
+<li><tt>m&lt;number&gt;</tt> &mdash; Minimum sample percentage to be shown.
+Default: 3%.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds.
+Default: 10ms.<br>
+Note: The actual sampling precision is OS-dependent.</li>
+</ul>
+<p>
+The default output for <tt>-jp</tt> is a list of the most CPU consuming
+spots in the application. Increasing the stack dump depth with (say)
+<tt>-jp=2</tt> may help to point out the main callers or callees of
+hotspots. But sample aggregation is still flat per unique stack dump.
+</p>
+<p>
+To get a two-level view (split view) of callers/callees, use
+<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second
+level are relative to the first level.
+</p>
+<p>
+To see how much time is spent in each line relative to a function, use
+<tt>-jp=fl</tt>.
+</p>
+<p>
+To see how much time is spent in different VM states or
+<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>.
+</p>
+<p>
+Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level
+views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time
+spent in a VM state or zone vs. hotspots. This can be used to answer
+questions like "Which time consuming functions are only interpreted?" or
+"What's the garbage collector overhead for a specific function?".
+</p>
+<p>
+Multiple options can be combined &mdash; but not all combinations make
+sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels
+deep in 4ms intervals and shows a split view of the CPU consuming
+functions and their callers with a 1% threshold.
+</p>
+<p>
+Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are
+always flat and at the line level. Obviously, the source code files need
+to be readable by the profiler script.
+</p>
+<p>
+The high-level profiler can also be started and stopped from Lua code with:
+</p>
+<pre class="code">
+require("jit.p").start(options, output)
+...
+require("jit.p").stop()
+</pre>
+
+<h3 id="jit_zone"><tt>jit.zone</tt> &mdash; Zones</h3>
+<p>
+Zones can be used to provide information about different parts of an
+application to the high-level profiler. E.g. a game could make use of an
+<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical,
+organized as a stack.
+</p>
+<p>
+The <tt>jit.zone</tt> module needs to be loaded explicitly:
+</p>
+<pre class="code">
+local zone = require("jit.zone")
+</pre>
+<ul>
+<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li>
+<li><tt>zone()</tt> pops the current zone from the zone stack and
+returns its name.</li>
+<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li>
+<li><tt>zone:flush()</tt> flushes the zone stack.</li>
+</ul>
+<p>
+To show the time spent in each zone use <tt>-jp=z</tt>. To show the time
+spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>.
+</p>
+
+<h2 id="ll_lua_api">Low-level Lua API</h2>
+<p>
+The <tt>jit.profile</tt> module gives access to the low-level API of the
+profiler from Lua code. This module needs to be loaded explicitly:
+<pre class="code">
+local profile = require("jit.profile")
+</pre>
+<p>
+This module can be used to implement your own higher-level profiler.
+A typical profiling run starts the profiler, captures stack dumps in
+the profiler callback, adds them to a hash table to aggregate the number
+of samples, stops the profiler and then analyzes all of the captured
+stack dumps. Other parameters can be sampled in the profiler callback,
+too. But it's important not to spend too much time in the callback,
+since this may skew the statistics.
+</p>
+
+<h3 id="profile_start"><tt>profile.start(mode, cb)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. The <tt>mode</tt> argument is a
+string holding options:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Profile with precision down to the function level.</li>
+<li><tt>l</tt> &mdash; Profile with precision down to the line level.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds (default
+10ms).</br>
+Note: The actual sampling precision is OS-dependent.
+</li>
+</ul>
+<p>
+The <tt>cb</tt> argument is a callback function which is called with
+three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is
+called on a separate coroutine, the <tt>thread</tt> argument is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify the stack of that state or call functions on it.
+</p>
+<p>
+<tt>samples</tt> gives the number of accumulated samples since the last
+callback (usually 1).
+</p>
+<p>
+<tt>vmstate</tt> holds the VM state at the time the profiling timer
+triggered. This may or may not correspond to the state of the VM when
+the profiling callback is called. The state is either <tt>'N'</tt>
+native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt>
+C&nbsp;code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT
+compiler.
+</p>
+
+<h3 id="profile_stop"><tt>profile.stop()</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner. It
+returns a string with a stack dump for the <tt>thread</tt> (coroutine),
+formatted according to the <tt>fmt</tt> argument:
+</p>
+<ul>
+<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise
+only the file name is used.</li>
+<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise
+use module:line.</li>
+<li><tt>F</tt> &mdash; Ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Dump module:line.</li>
+<li><tt>Z</tt> &mdash; Zap the following characters for the last dumped
+frame.</li>
+<li>All other characters are added verbatim to the output string.</li>
+</ul>
+<p>
+The <tt>depth</tt> argument gives the number of frames to dump, starting
+at the topmost frame of the thread. A negative number dumps the frames in
+inverse order.
+</p>
+<p>
+The first example prints a list of the current module names and line
+numbers of up to 10 frames in separate lines. The second example prints
+semicolon-separated function names for all frames (up to 100) in inverse
+order:
+</p>
+<pre class="code">
+print(profile.dumpstack(thread, "l\n", 10))
+print(profile.dumpstack(thread, "lZ;", -100))
+</pre>
+
+<h2 id="ll_c_api">Low-level C API</h2>
+<p>
+The profiler can be controlled directly from C&nbsp;code, e.g. for
+use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see
+<a href="ext_c_api.html">Lua/C API</a> extensions).
+</p>
+
+<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. <a href="#profile_start">See
+above</a> for a description of the <tt>mode</tt> argument.
+</p>
+<p>
+The <tt>cb</tt> argument is a callback function with the following
+declaration:
+</p>
+<pre class="code">
+typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
+                                        int samples, int vmstate);
+</pre>
+<p>
+<tt>data</tt> is available for use by the callback. <tt>L</tt> is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify this stack or call functions on this stack &mdash;
+use a separate coroutine for this purpose. <a href="#profile_start">See
+above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>.
+</p>
+
+<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner.
+<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt>
+and <tt>depth</tt>.
+</p>
+<p>
+This function returns a <tt>const&nbsp;char&nbsp;*</tt> pointing to a
+private string buffer of the profiler. The <tt>int&nbsp;*len</tt>
+argument returns the length of the output string. The buffer is
+overwritten on the next call and deallocated when the profiler stops.
+You either need to consume the content immediately or copy it for later
+use.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 53 - 5
jni/LuaJIT-2.0.1/doc/extensions.html → jni/LuaJIT-2.1/doc/extensions.html

@@ -4,7 +4,7 @@
 <title>Extensions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -58,6 +58,8 @@ td.excinterop {
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -113,6 +115,9 @@ bit.lshift bit.rshift bit.arshift bit.rol  bit.ror  bit.bswap
 This module is a LuaJIT built-in &mdash; you don't need to download or
 install Lua BitOp. The Lua BitOp site has full documentation for all
 <a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
+The FFI adds support for
+<a href="ext_ffi_semantics.html#cdata_arith">64&nbsp;bit bitwise operations</a>,
+using the same API functions.
 </p>
 <p>
 Please make sure to <tt>require</tt> the module before using any of
@@ -146,6 +151,11 @@ LuaJIT adds some
 <a href="ext_c_api.html">extra functions to the Lua/C API</a>.
 </p>
 
+<h3 id="profiler">Profiler</h3>
+<p>
+LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>.
+</p>
+
 <h2 id="library">Enhanced Standard Library Functions</h2>
 
 <h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
@@ -173,7 +183,7 @@ in <tt>"-inf"</tt>.
 <h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
 <p>
 All string-to-number conversions consistently convert integer and
-floating-point inputs in decimal and hexadecimal on all platforms.
+floating-point inputs in decimal, hexadecimal and binary on all platforms.
 <tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
 problems with poor C library implementations. The builtin conversion
 function provides full precision according to the IEEE-754 standard, it
@@ -197,6 +207,36 @@ for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
 minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
 bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
 </p>
+<p>
+Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
+a different, incompatible bytecode format for ports that use this mode (e.g.
+ARM64). This may be rectified in the future.
+</p>
+
+<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
+<p>
+An extra library function <tt>table.new()</tt> can be made available via
+<tt>require("table.new")</tt>. This creates a pre-sized table, just like
+the C API equivalent <tt>lua_createtable()</tt>. This is useful for big
+tables if the final table size is known and automatic table resizing is
+too expensive.
+</p>
+
+<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3>
+<p>
+An extra library function <tt>table.clear()</tt> can be made available
+via <tt>require("table.clear")</tt>. This clears all keys and values
+from a table, but preserves the allocated array/hash sizes. This is
+useful when a table, which is linked from multiple places, needs to be
+cleared and/or when recycling a table for use by the same context. This
+avoids managing backlinks, saves an allocation and the overhead of
+incremental array/hash part growth.
+</p>
+<p>
+Please note this function is meant for very specific situations. In most
+cases it's better to replace the (usually single) link with a new table
+and let the GC do its work.
+</p>
 
 <h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
 <p>
@@ -248,8 +288,9 @@ enabled:
 <ul>
 <li><tt>goto</tt> and <tt>::labels::</tt>.</li>
 <li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li>
-<li><tt>load(string|reader, chunkname [,mode [,env]])</tt>.
-<tt>loadstring()</tt> is an alias.</li>
+<li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li>
+<li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li>
+<li><tt>loadfile(filename [,mode [,env]])</tt>.</li>
 <li><tt>math.log(x [,base])</tt>.
 <li><tt>string.rep(s, n [,sep])</tt>.
 <li><tt>string.format()</tt>: <tt>%q</tt> reversible.
@@ -303,6 +344,13 @@ Lua&nbsp;5.1, which prevents implementing features that would otherwise
 break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
 </p>
 
+<h2 id="lua53">Extensions from Lua 5.3</h2>
+<p>
+LuaJIT supports some extensions from Lua&nbsp;5.3:
+<ul>
+<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
+</ul>
+
 <h2 id="exceptions">C++ Exception Interoperability</h2>
 <p>
 LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
@@ -397,7 +445,7 @@ lead to the termination of the process.</li>
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 4 - 2
jni/LuaJIT-2.0.1/doc/faq.html → jni/LuaJIT-2.1/doc/faq.html

@@ -4,7 +4,7 @@
 <title>Frequently Asked Questions (FAQ)</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -44,6 +44,8 @@ dd { margin-left: 1.5em; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -174,7 +176,7 @@ the development of certain features, if they are important to you.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 0 - 0
jni/LuaJIT-2.0.1/doc/img/contact.png → jni/LuaJIT-2.1/doc/img/contact.png


+ 92 - 42
jni/LuaJIT-2.0.1/doc/install.html → jni/LuaJIT-2.1/doc/install.html

@@ -4,7 +4,7 @@
 <title>Installation</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -69,6 +69,8 @@ td.compatno {
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -112,38 +114,38 @@ operating systems, CPUs and compilers:
 </tr>
 <tr class="odd separate">
 <td class="compatcpu">x86 (32 bit)</td>
-<td class="compatos">GCC 4.x<br>GCC 3.4</td>
-<td class="compatos">GCC 4.x<br>GCC 3.4</td>
-<td class="compatos">GCC 4.x<br>GCC 3.4</td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos">XCode 5.0+<br>Clang</td>
 <td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td>
 </tr>
 <tr class="even">
 <td class="compatcpu">x64 (64 bit)</td>
-<td class="compatos">GCC 4.x</td>
-<td class="compatos compatno">&nbsp;</td>
-<td class="compatos">GCC 4.x</td>
-<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0</td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td>
+<td class="compatos">XCode 5.0+<br>Clang</td>
+<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
 </tr>
 <tr class="odd">
 <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
 <td class="compatos">GCC 4.2+</td>
-<td class="compatos">GCC 4.2+</td>
-<td class="compatos">GCC 4.2+</td>
+<td class="compatos">GCC 4.2+<br>PSP2 (<a href="#psvita">PS VITA</a>)</td>
+<td class="compatos">XCode 5.0+<br>Clang</td>
 <td class="compatos compatno">&nbsp;</td>
 </tr>
 <tr class="even">
-<td class="compatcpu"><a href="#cross2">PPC</a></td>
-<td class="compatos">GCC 4.3+</td>
-<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
+<td class="compatcpu"><a href="#cross2">ARM64</a></td>
+<td class="compatos">GCC 4.8+</td>
+<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">XCode 6.0+<br>Clang 3.5+</td>
 <td class="compatos compatno">&nbsp;</td>
-<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
 </tr>
 <tr class="odd">
-<td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td>
-<td class="compatos">GCC 4.3+</td>
+<td class="compatcpu"><a href="#cross2">PPC</a></td>
 <td class="compatos">GCC 4.3+</td>
+<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
 <td class="compatos compatno">&nbsp;</td>
-<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
 </tr>
 <tr class="even">
 <td class="compatcpu"><a href="#cross2">MIPS</a></td>
@@ -188,8 +190,8 @@ open a terminal window and change to this directory. Now unpack the archive
 and change to the newly created directory:
 </p>
 <pre class="code">
-tar zxf LuaJIT-2.0.1.tar.gz
-cd LuaJIT-2.0.1</pre>
+tar zxf LuaJIT-2.0.4.tar.gz
+cd LuaJIT-2.0.4</pre>
 <h3>Building LuaJIT</h3>
 <p>
 The supplied Makefiles try to auto-detect the settings needed for your
@@ -383,10 +385,11 @@ make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
 # ARM hard-float ABI with VFP (armhf, requires recent toolchain)
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
 
+# ARM64 (requires x64 host)
+make CROSS=aarch64-linux-
+
 # PPC
 make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
-# PPC/e500v2 (fast interpreter only)
-make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
 
 # MIPS big-endian
 make HOST_CC="gcc -m32" CROSS=mips-linux-
@@ -439,8 +442,7 @@ NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
 </pre>
 <p>
-You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>.
-The environment variables need to match the iOS SDK version:
+You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:
 </p>
 <p style="font-size: 8pt;">
 Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
@@ -450,38 +452,84 @@ much slower than the JIT compiler. Please complain to Apple, not me.
 Or use Android. :-p
 </p>
 <pre class="code">
-IXCODE=`xcode-select -print-path`
-ISDK=$IXCODE/Platforms/iPhoneOS.platform/Developer
-ISDKVER=iPhoneOS6.0.sdk
-ISDKP=$ISDK/usr/bin/
-ISDKF="-arch armv7 -isysroot $ISDK/SDKs/$ISDKVER"
-make HOST_CC="gcc -m32 -arch i386" CROSS=$ISDKP TARGET_FLAGS="$ISDKF" \
-     TARGET_SYS=iOS
+# iOS/ARM (32 bit)
+ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ICC=$(xcrun --sdk iphoneos --find clang)
+ISDKF="-arch armv7 -isysroot $ISDKP"
+make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \
+     TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
+
+# iOS/ARM64
+ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ICC=$(xcrun --sdk iphoneos --find clang)
+ISDKF="-arch arm64 -isysroot $ISDKP"
+make CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
 </pre>
+
+<h3 id="consoles">Cross-compiling for consoles</h3>
+<p>
+Building LuaJIT for consoles requires both a supported host compiler
+(x86 or x64) and a cross-compiler (to PPC or ARM) from the official
+console SDK.
+</p>
+<p>
+Due to restrictions on consoles, the JIT compiler is disabled and only
+the fast interpreter is built. This is still faster than plain Lua,
+but much slower than the JIT compiler. The FFI is disabled, too, since
+it's not very useful in such an environment.
+</p>
+<p>
+The following commands build a static library <tt>libluajit.a</tt>,
+which can be linked against your game, just like the Lua library.
+</p>
 <p>
-You can cross-compile for <b id="ps3">PS3</b> using the PS3&nbsp;SDK from
-a Linux host or a Windows host (requires 32 bit MinGW (GCC) on the host,
-too). Due to restrictions on consoles, the JIT compiler is disabled and
-only the fast interpreter is built:
+To cross-compile for <b id="ps3">PS3</b> from a Linux host (requires
+32&nbsp;bit GCC, i.e. multilib Linux/x64) or a Windows host (requires
+32&nbsp;bit MinGW), run this command:
 </p>
 <pre class="code">
 make HOST_CC="gcc -m32" CROSS=ppu-lv2-
 </pre>
 <p>
-You can cross-compile for <b id="xbox360">Xbox 360</b> using the
-Xbox&nbsp;360 SDK (MSVC + XEDK). Due to restrictions on consoles, the
-JIT compiler is disabled and only the fast interpreter is built.
+To cross-compile for <b id="ps4">PS4</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and
+run the following commands:
 </p>
+<pre class="code">
+cd src
+ps4build
+</pre>
 <p>
-Open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
+To cross-compile for <b id="psvita">PS Vita</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and
+run the following commands:
+</p>
+<pre class="code">
+cd src
+psvitabuild
+</pre>
+<p>
+To cross-compile for <b id="xbox360">Xbox 360</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
 <tt>cd</tt> to the directory where you've unpacked the sources and run
-the following commands. This builds a static library <tt>luajit20.lib</tt>,
-which can be linked against your game, just like the Lua library.
+the following commands:
 </p>
 <pre class="code">
 cd src
 xedkbuild
 </pre>
+<p>
+To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and run
+the following commands:
+</p>
+<pre class="code">
+cd src
+xb1build
+</pre>
 
 <h2 id="embed">Embedding LuaJIT</h2>
 <p>
@@ -565,9 +613,11 @@ for a regular distribution build:
 <ul>
 <li><tt>PREFIX</tt> overrides the installation path and should usually
 be set to <tt>/usr</tt>. Setting this also changes the module paths and
-the <tt>-rpath</tt> of the shared library.</li>
+the paths needed to locate the shared library.</li>
 <li><tt>DESTDIR</tt> is an absolute path which allows you to install
 to a shadow tree instead of the root tree of the build system.</li>
+<li><tt>MULTILIB</tt> sets the architecture-specific library path component
+for multilib systems. The default is <tt>lib</tt>.</li>
 <li>Have a look at the top-level <tt>Makefile</tt> and <tt>src/Makefile</tt>
 for additional variables to tweak. The following variables <em>may</em> be
 overridden, but it's <em>not</em> recommended, except for special needs
@@ -603,7 +653,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 13 - 5
jni/LuaJIT-2.0.1/doc/luajit.html → jni/LuaJIT-2.1/doc/luajit.html

@@ -4,7 +4,7 @@
 <title>LuaJIT</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -38,6 +38,9 @@ table.os1 td {
 table.os2 td {
   color: #ffa040;
 }
+table.os3 td {
+  color: #40ffff;
+}
 table.compiler td {
   color: #2080ff;
   background: #62bf41;
@@ -123,6 +126,8 @@ table.feature small {
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -147,7 +152,7 @@ Lua is a powerful, dynamic and light-weight programming language.
 It may be embedded or used as a general-purpose, stand-alone language.
 </p>
 <p>
-LuaJIT is Copyright &copy; 2005-2013 Mike Pall, released under the
+LuaJIT is Copyright &copy; 2005-2015 Mike Pall, released under the
 <a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
 </p>
 <p>
@@ -158,13 +163,16 @@ LuaJIT is Copyright &copy; 2005-2013 Mike Pall, released under the
 <tr><td>Windows</td><td>Linux</td><td>BSD</td><td>OSX</td><td>POSIX</td></tr>
 </table>
 <table class="feature os os2">
-<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td><td>PS3</td><td>Xbox 360</td></tr>
+<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
+</table>
+<table class="feature os os3">
+<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
 </table>
 <table class="feature compiler">
 <tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
 </table>
 <table class="feature cpu">
-<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr>
+<tr><td>x86</td><td>x64</td><td>ARM</td><td>ARM64</td><td>PPC</td><td>MIPS</td></tr>
 </table>
 <table class="feature fcompat">
 <tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
@@ -218,7 +226,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 6 - 3
jni/LuaJIT-2.0.1/doc/running.html → jni/LuaJIT-2.1/doc/running.html

@@ -4,7 +4,7 @@
 <title>Running LuaJIT</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -63,6 +63,8 @@ td.param_default {
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -178,6 +180,7 @@ Here are the available LuaJIT control commands:
 <li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
 <li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
 <li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
+<li id="j_p"><tt>-jp</tt> &mdash; Start the <a href="ext_profiler.html">integrated profiler</a>.</li>
 </ul>
 <p>
 The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
@@ -186,7 +189,7 @@ itself. For a description of their options and output format, please
 read the comment block at the start of their source.
 They can be found in the <tt>lib</tt> directory of the source
 distribution or installed under the <tt>jit</tt> directory. By default
-this is <tt>/usr/local/share/luajit-2.0.1/jit</tt> on POSIX
+this is <tt>/usr/local/share/luajit-2.0.4/jit</tt> on POSIX
 systems.
 </p>
 
@@ -296,7 +299,7 @@ Here are the parameters and their default settings:
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 6 - 13
jni/LuaJIT-2.0.1/doc/status.html → jni/LuaJIT-2.1/doc/status.html

@@ -1,10 +1,10 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <html>
 <head>
-<title>Status &amp; Roadmap</title>
+<title>Status</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -17,7 +17,7 @@ ul li { padding-bottom: 0.3em; }
 <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
 </div>
 <div id="head">
-<h1>Status &amp; Roadmap</h1>
+<h1>Status</h1>
 </div>
 <div id="nav">
 <ul><li>
@@ -44,6 +44,8 @@ ul li { padding-bottom: 0.3em; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a class="current" href="status.html">Status</a>
@@ -102,20 +104,11 @@ handled correctly. The error may fall through an on-trace
 garbage collector.
 </li>
 </ul>
-
-<h2>Roadmap</h2>
-<p>
-Please refer to the
-<a href="http://www.freelists.org/post/luajit/LuaJIT-Roadmap-20122013"><span class="ext">&raquo;</span>&nbsp;LuaJIT Roadmap 2012/2013</a> and an
-<a href="http://www.freelists.org/post/luajit/LuaJIT-Roadmap-20122013-UPDATE"><span class="ext">&raquo;</span>&nbsp;update on release planning</a> for details.
-</p>
-<p>
-</p>
 <br class="flush">
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>

+ 3 - 2
jni/LuaJIT-2.0.1/dynasm/dasm_arm.h → jni/LuaJIT-2.1/dynasm/dasm_arm.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM ARM encoding engine.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -211,7 +211,8 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_REL_LG:
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
-	if (n >= 0) { CKPL(lg, LG); goto putrel; }  /* Bkwd rel or global. */
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
 	pl += 10; n = *pl;
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	goto linkrel;

+ 9 - 6
jni/LuaJIT-2.0.1/dynasm/dasm_arm.lua → jni/LuaJIT-2.1/dynasm/dasm_arm.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM ARM module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -9,9 +9,9 @@
 local _info = {
   arch =	"arm",
   description =	"DynASM ARM module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   license =	"MIT",
 }
@@ -923,19 +923,22 @@ local function parse_template(params, template, nparams, pos)
 end
 
 map_op[".template__"] = function(params, template, nparams)
-  if not params then return sub(template, 9) end
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
 
   -- Limit number of section buffer positions used by a single dasm_put().
   -- A single opcode needs a maximum of 3 positions.
   if secpos+3 > maxsecpos then wflush() end
   local pos = wpos()
-  local apos, spos = #actargs, secpos
+  local lpos, apos, spos = #actlist, #actargs, secpos
 
   local ok, err
   for t in gmatch(template, "[^|]+") do
     ok, err = pcall(parse_template, params, t, nparams, pos)
     if ok then return end
     secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
     actargs[apos+1] = nil
     actargs[apos+2] = nil
     actargs[apos+3] = nil

+ 518 - 0
jni/LuaJIT-2.1/dynasm/dasm_arm64.h

@@ -0,0 +1,518 @@
+/*
+** DynASM ARM64 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"arm64"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. */
+enum {
+  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+  /* The following actions need a buffer position. */
+  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+  /* The following actions also have an argument. */
+  DASM_REL_PC, DASM_LABEL_PC,
+  DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
+  DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals (bias -10). */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
+  }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+  if ((n >> 12) == 0)
+    return n;
+  else if ((n & 0xff000fff) == 0)
+    return (n >> 12) | 0x1000;
+  else
+    return -1;
+}
+
+static int dasm_ffs(unsigned long long x)
+{
+  int n = -1;
+  while (x) { x >>= 1; n++; }
+  return n;
+}
+
+static int dasm_imm13(int lo, int hi)
+{
+  int inv = 0, w = 64, s = 0xfff, xa, xb;
+  unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
+  unsigned long long m = 1ULL, a, b, c;
+  if (n & 1) { n = ~n; inv = 1; }
+  a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
+  xa = dasm_ffs(a); xb = dasm_ffs(b);
+  if (c) {
+    w = dasm_ffs(c) - xa;
+    if (w == 32) m = 0x0000000100000001UL;
+    else if (w == 16) m = 0x0001000100010001UL;
+    else if (w == 8) m = 0x0101010101010101UL;
+    else if (w == 4) m = 0x1111111111111111UL;
+    else if (w == 2) m = 0x5555555555555555UL;
+    else return -1;
+    s = (-2*w & 0x3f) - 1;
+  } else if (!a) {
+    return -1;
+  } else if (xb == -1) {
+    xb = 64;
+  }
+  if ((b-a) * m != n) return -1;
+  if (inv) {
+    return ((w - xb) << 6) | (s+w+xa-xb);
+  } else {
+    return ((w - xa) << 6) | (s+xb-xa);
+  }
+  return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    unsigned int ins = *p++;
+    unsigned int action = (ins >> 16);
+    if (action >= DASM__MAX) {
+      ofs += 4;
+    } else {
+      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+      switch (action) {
+      case DASM_STOP: goto stop;
+      case DASM_SECTION:
+	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+	D->section = &D->sections[n]; goto stop;
+      case DASM_ESC: p++; ofs += 4; break;
+      case DASM_REL_EXT: break;
+      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+      case DASM_REL_LG:
+	n = (ins & 2047) - 10; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl += 10; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	break;
+      case DASM_LABEL_LG:
+	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+	}
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_IMM:
+	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+	n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+	if ((ins & 0x8000))
+	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+	else
+	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+      case DASM_IMM6:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM12:
+	CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13W:
+	CK(dasm_imm13(n, n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13X: {
+	int m = va_arg(ap, int);
+	CK(dasm_imm13(n, m) != -1, RANGE_I);
+	b[pos++] = n;
+	b[pos++] = m;
+	break;
+	}
+      case DASM_IMML: {
+#ifdef DASM_CHECKS
+	int scale = (p[-2] >> 30);
+	CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
+	   (unsigned int)(n+256) < 512, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+	}
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: p++; break;
+	case DASM_REL_EXT: break;
+	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+	case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
+	case DASM_IMML: pos++; break;
+	case DASM_IMM13X: pos += 2; break;
+	}
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st)	((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned int *cp = (unsigned int *)buffer;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: *cp++ = *p++; break;
+	case DASM_REL_EXT:
+	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
+	  goto patchrel;
+	case DASM_ALIGN:
+	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+	  break;
+	case DASM_REL_LG:
+	  CK(n >= 0, UNDEF_LG);
+	case DASM_REL_PC:
+	  CK(n >= 0, UNDEF_PC);
+	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+	patchrel:
+	  if (!(ins & 0xf800)) {  /* B, BL */
+	    CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
+	    cp[-1] |= ((n >> 2) & 0x03ffffff);
+	  } else if ((ins & 0x800)) {  /* B.cond, CBZ, CBNZ, LDR* literal */
+	    CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0);
+	  } else if ((ins & 0x3000) == 0x2000) {  /* ADR */
+	    CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
+	  } else if ((ins & 0x3000) == 0x3000) {  /* ADRP */
+	    cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
+	  } else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
+	    CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x0007ffe0);
+	  }
+	  break;
+	case DASM_LABEL_LG:
+	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  break;
+	case DASM_LABEL_PC: break;
+	case DASM_IMM:
+	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+	  break;
+	case DASM_IMM6:
+	  cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
+	  break;
+	case DASM_IMM12:
+	  cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
+	  break;
+	case DASM_IMM13W:
+	  cp[-1] |= (dasm_imm13(n, n) << 10);
+	  break;
+	case DASM_IMM13X:
+	  cp[-1] |= (dasm_imm13(n, *b++) << 10);
+	  break;
+	case DASM_IMML: {
+	  int scale = (p[-2] >> 30);
+	  cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
+	    ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
+	  break;
+	  }
+	default: *cp++ = ins; break;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+  return D->status;
+}
+#endif
+

+ 1166 - 0
jni/LuaJIT-2.1/dynasm/dasm_arm64.lua

@@ -0,0 +1,1166 @@
+------------------------------------------------------------------------------
+-- DynASM ARM64 module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+  arch =	"arm",
+  description =	"DynASM ARM64 module",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "STOP", "SECTION", "ESC", "REL_EXT",
+  "ALIGN", "REL_LG", "LABEL_LG",
+  "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+  map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+  for i = 1,nn-1 do
+    assert(out:write("0x", tohex(actlist[i]), ",\n"))
+  end
+  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxw(w * 0x10000 + (val or 0))
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+  if n <= 0x000fffff then waction("ESC") end
+  wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  if n <= 0x000fffff then
+    insert(actlist, pos+1, n)
+    n = map_action.ESC * 0x10000
+  end
+  actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20,next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0,next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0,next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, }
+
+local map_extend = {
+  uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
+  sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
+}
+
+local map_cond = {
+  eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+  hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+  hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+local parse_reg_type
+
+local function parse_reg(expr)
+  if not expr then werror("expected register name") end
+  local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
+      if not parse_reg_type then
+	parse_reg_type = rt
+      elseif parse_reg_type ~= rt then
+	werror("register size mismatch")
+      end
+      return r, tp
+    end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_reg_base(expr)
+  if expr == "sp" then return 0x3e0 end
+  local base, tp = parse_reg(expr)
+  if parse_reg_type ~= "x" then werror("bad register type") end
+  parse_reg_type = false
+  return shl(base, 5), tp
+end
+
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n then
+      if signed then
+	local s = sar(m, bits-1)
+	if s == 0 then return shl(m, shift)
+	elseif s == -1 then return shl(m + shl(1, bits), shift) end
+      else
+	if sar(m, bits) == 0 then return shl(m, shift) end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    return 0
+  end
+end
+
+local function parse_imm12(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if shr(n, 12) == 0 then
+      return shl(n, 10)
+    elseif band(n, 0xff000fff) == 0 then
+      return shr(n, 2) + 0x00400000
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM12", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm13(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  local r64 = parse_reg_type == "x"
+  if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
+    local inv = false
+    if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
+    local t = {}
+    for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
+    local b = table.concat(t)
+    b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
+    local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
+    if p0 then
+      local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
+      if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
+	local s = band(-2*w, 0x3f) - 1
+	if w == 64 then s = s + 0x1000 end
+	if inv then
+	  return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
+	else
+	  return shl(w-#p0, 16) + shl(s+#p1, 10)
+	end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif r64 then
+    waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
+    actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
+    return 0
+  else
+    waction("IMM13W", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm6(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if n >= 0 and n <= 63 then
+      return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM6", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm_load(imm, scale)
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n and m >= 0 and m < 0x1000 then
+      return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
+    elseif n >= -256 and n < 256 then
+      return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMML", 0, imm)
+    return 0
+  end
+end
+
+local function parse_fpimm(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m, e = math.frexp(n)
+    local s, e2 = 0, band(e-2, 7)
+    if m < 0 then m = -m; s = 0x00100000 end
+    m = m*32-16
+    if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
+      return s + shl(e2, 17) + shl(m, 13)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    werror("NYI fpimm action")
+  end
+end
+
+local function parse_shift(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  s = map_shift[s]
+  if not s then werror("expected shift operand") end
+  return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
+end
+
+local function parse_lslx16(expr)
+  local n = match(expr, "^lsl%s*#(%d+)$")
+  n = tonumber(n)
+  if not n then werror("expected shift operand") end
+  if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
+    werror("bad shift amount")
+  end
+  return shl(n, 17)
+end
+
+local function parse_extend(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  if s == "lsl" then
+    s = parse_reg_type == "x" and 3 or 2
+  else
+    s = map_extend[s]
+  end
+  if not s then werror("expected extend operand") end
+  return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
+end
+
+local function parse_cond(expr, inv)
+  local c = map_cond[expr]
+  if not c then werror("expected condition operand") end
+  return shl(bit.bxor(c, inv), 12)
+end
+
+local function parse_load(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMML", 0, format(tp.ctypefmt, tailr))
+	  return op + base
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  local scale = shr(op, 30)
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+  elseif wb == "!" then
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if not p1a then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
+    op = op + parse_reg_base(p1a)
+    if p2a ~= "" then
+      local imm = match(p2a, "^,%s*#(.*)$")
+      if imm then
+	op = op + parse_imm_load(imm, scale)
+      else
+	local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+	op = op + shl(parse_reg(p2b), 16) + 0x00200800
+	if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+	  werror("bad index register type")
+	end
+	if p3b == "" then
+	  if parse_reg_type ~= "x" then werror("bad index register type") end
+	  op = op + 0x6000
+	else
+	  if p3s == "" or p3s == "#0" then
+	  elseif p3s == "#"..scale then
+	    op = op + 0x1000
+	  else
+	    werror("bad scale")
+	  end
+	  if parse_reg_type == "x" then
+	    if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
+	    elseif p3b == "sxtx" then op = op + 0xe000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  else
+	    if p3b == "uxtw" then op = op + 0x4000
+	    elseif p3b == "sxtw" then op = op + 0xc000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  end
+	end
+      end
+    else
+      if wb == "!" then werror("bad use of '!'") end
+      op = op + 0x01000000
+    end
+  end
+  return op
+end
+
+local function parse_load_pair(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local scale = shr(op, 30) == 0 and 2 or 3
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
+	  return op + base + 0x01000000
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + 0x00800000
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
+    op = op + (wb == "!" and 0x01800000 or 0x01000000)
+  end
+  return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
+end
+
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+end
+
+local function branch_type(op)
+  if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
+  elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
+	 band(op, 0x3b000000) == 0x18000000 then
+    return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
+  elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
+  elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
+  elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
+  else
+    assert(false, "unknown branch type")
+  end
+end
+
+------------------------------------------------------------------------------
+
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
+local function alias_bfx(p)
+  p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
+end
+
+local function alias_bfiz(p)
+  parse_reg(p[1])
+  if parse_reg_type == "w" then
+    p[3] = "#-("..p[3]:sub(2)..")%32"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  else
+    p[3] = "#-("..p[3]:sub(2)..")%64"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  end
+end
+
+local alias_lslimm = op_alias("ubfm_4", function(p)
+  parse_reg(p[1])
+  local sh = p[3]:sub(2)
+  if parse_reg_type == "w" then
+    p[3] = "#-("..sh..")%32"
+    p[4] = "#31-("..sh..")"
+  else
+    p[3] = "#-("..sh..")%64"
+    p[4] = "#63-("..sh..")"
+  end
+end)
+
+-- Template strings for ARM instructions.
+map_op = {
+  -- Basic data processing instructions.
+  add_3  = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
+  add_4  = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
+  adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
+  adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
+  cmn_2  = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
+  cmn_3  = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
+
+  sub_3  = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
+  sub_4  = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
+  subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
+  subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
+  cmp_2  = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
+  cmp_3  = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
+
+  neg_2  = "4b0003e0DMg",
+  neg_3  = "4b0003e0DMSg",
+  negs_2 = "6b0003e0DMg",
+  negs_3 = "6b0003e0DMSg",
+
+  adc_3  = "1a000000DNMg",
+  adcs_3 = "3a000000DNMg",
+  sbc_3  = "5a000000DNMg",
+  sbcs_3 = "7a000000DNMg",
+  ngc_2  = "5a0003e0DMg",
+  ngcs_2 = "7a0003e0DMg",
+
+  and_3  = "0a000000DNMg|12000000pDNig",
+  and_4  = "0a000000DNMSg",
+  orr_3  = "2a000000DNMg|32000000pDNig",
+  orr_4  = "2a000000DNMSg",
+  eor_3  = "4a000000DNMg|52000000pDNig",
+  eor_4  = "4a000000DNMSg",
+  ands_3 = "6a000000DNMg|72000000DNig",
+  ands_4 = "6a000000DNMSg",
+  tst_2  = "6a00001fNMg|7200001fNig",
+  tst_3  = "6a00001fNMSg",
+
+  bic_3  = "0a200000DNMg",
+  bic_4  = "0a200000DNMSg",
+  orn_3  = "2a200000DNMg",
+  orn_4  = "2a200000DNMSg",
+  eon_3  = "4a200000DNMg",
+  eon_4  = "4a200000DNMSg",
+  bics_3 = "6a200000DNMg",
+  bics_4 = "6a200000DNMSg",
+
+  movn_2 = "12800000DWg",
+  movn_3 = "12800000DWRg",
+  movz_2 = "52800000DWg",
+  movz_3 = "52800000DWRg",
+  movk_2 = "72800000DWg",
+  movk_3 = "72800000DWRg",
+
+  -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
+  mov_2  = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
+  mov_3  = "2a0003e0DMSg",
+  mvn_2  = "2a2003e0DMg",
+  mvn_3  = "2a2003e0DMSg",
+
+  adr_2  = "10000000DBx",
+  adrp_2 = "90000000DBx",
+
+  csel_4  = "1a800000DNMCg",
+  csinc_4 = "1a800400DNMCg",
+  csinv_4 = "5a800000DNMCg",
+  csneg_4 = "5a800400DNMCg",
+  cset_2  = "1a9f07e0Dcg",
+  csetm_2 = "5a9f03e0Dcg",
+  cinc_3  = "1a800400DNmcg",
+  cinv_3  = "5a800000DNmcg",
+  cneg_3  = "5a800400DNmcg",
+
+  ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
+  ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
+
+  madd_4 = "1b000000DNMAg",
+  msub_4 = "1b008000DNMAg",
+  mul_3  = "1b007c00DNMg",
+  mneg_3 = "1b00fc00DNMg",
+
+  smaddl_4 = "9b200000DxNMwAx",
+  smsubl_4 = "9b208000DxNMwAx",
+  smull_3  = "9b207c00DxNMw",
+  smnegl_3 = "9b20fc00DxNMw",
+  smulh_3  = "9b407c00DNMx",
+  umaddl_4 = "9ba00000DxNMwAx",
+  umsubl_4 = "9ba08000DxNMwAx",
+  umull_3  = "9ba07c00DxNMw",
+  umnegl_3 = "9ba0fc00DxNMw",
+  umulh_3  = "9bc07c00DNMx",
+
+  udiv_3 = "1ac00800DNMg",
+  sdiv_3 = "1ac00c00DNMg",
+
+  -- Bit operations.
+  sbfm_4 = "13000000DN12w|93400000DN12x",
+  bfm_4  = "33000000DN12w|b3400000DN12x",
+  ubfm_4 = "53000000DN12w|d3400000DN12x",
+  extr_4 = "13800000DNM2w|93c00000DNM2x",
+
+  sxtb_2 = "13001c00DNw|93401c00DNx",
+  sxth_2 = "13003c00DNw|93403c00DNx",
+  sxtw_2 = "93407c00DxNw",
+  uxtb_2 = "53001c00DNw",
+  uxth_2 = "53003c00DNw",
+
+  sbfx_4  = op_alias("sbfm_4", alias_bfx),
+  bfxil_4 = op_alias("bfm_4", alias_bfx),
+  ubfx_4  = op_alias("ubfm_4", alias_bfx),
+  sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
+  bfi_4   = op_alias("bfm_4", alias_bfiz),
+  ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
+
+  lsl_3  = function(params, nparams)
+    if params and params[3]:byte() == 35 then
+      return alias_lslimm(params, nparams)
+    else
+      return op_template(params, "1ac02000DNMg", nparams)
+    end
+  end,
+  lsr_3  = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
+  asr_3  = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
+  ror_3  = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
+
+  clz_2   = "5ac01000DNg",
+  cls_2   = "5ac01400DNg",
+  rbit_2  = "5ac00000DNg",
+  rev_2   = "5ac00800DNw|dac00c00DNx",
+  rev16_2 = "5ac00400DNg",
+  rev32_2 = "dac00800DNx",
+
+  -- Loads and stores.
+  ["strb_*"]  = "38000000DwL",
+  ["ldrb_*"]  = "38400000DwL",
+  ["ldrsb_*"] = "38c00000DwL|38800000DxL",
+  ["strh_*"]  = "78000000DwL",
+  ["ldrh_*"]  = "78400000DwL",
+  ["ldrsh_*"] = "78c00000DwL|78800000DxL",
+  ["str_*"]   = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
+  ["ldr_*"]   = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
+  ["ldrsw_*"] = "98000000DxB|b8800000DxL",
+  -- NOTE: ldur etc. are handled by ldr et al.
+
+  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
+  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+  ["ldpsw_*"] = "68400000DAxP",
+
+  -- Branches.
+  b_1    = "14000000B",
+  bl_1   = "94000000B",
+  blr_1  = "d63f0000Nx",
+  br_1   = "d61f0000Nx",
+  ret_0  = "d65f03c0",
+  ret_1  = "d65f0000Nx",
+  -- b.cond is added below.
+  cbz_2  = "34000000DBg",
+  cbnz_2 = "35000000DBg",
+  tbz_3  = "36000000DTBw|36000000DTBx",
+  tbnz_3 = "37000000DTBw|37000000DTBx",
+
+  -- Miscellaneous instructions.
+  -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
+  -- TODO: sys, sysl, ic, dc, at, tlbi
+  -- TODO: hint, yield, wfe, wfi, sev, sevl
+  -- TODO: clrex, dsb, dmb, isb
+  nop_0  = "d503201f",
+  brk_0  = "d4200000",
+  brk_1  = "d4200000W",
+
+  -- Floating point instructions.
+  fmov_2  = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
+  fabs_2  = "1e20c000DNf",
+  fneg_2  = "1e214000DNf",
+  fsqrt_2 = "1e21c000DNf",
+
+  fcvt_2  = "1e22c000DdNs|1e624000DsNd",
+
+  -- TODO: half-precision and fixed-point conversions.
+  fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
+  fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
+  fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
+  fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
+  fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
+  fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
+  fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
+  fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
+  fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
+  fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
+
+  scvtf_2  = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
+  ucvtf_2  = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
+
+  frintn_2 = "1e244000DNf",
+  frintp_2 = "1e24c000DNf",
+  frintm_2 = "1e254000DNf",
+  frintz_2 = "1e25c000DNf",
+  frinta_2 = "1e264000DNf",
+  frintx_2 = "1e274000DNf",
+  frinti_2 = "1e27c000DNf",
+
+  fadd_3   = "1e202800DNMf",
+  fsub_3   = "1e203800DNMf",
+  fmul_3   = "1e200800DNMf",
+  fnmul_3  = "1e208800DNMf",
+  fdiv_3   = "1e201800DNMf",
+
+  fmadd_4  = "1f000000DNMAf",
+  fmsub_4  = "1f008000DNMAf",
+  fnmadd_4 = "1f200000DNMAf",
+  fnmsub_4 = "1f208000DNMAf",
+
+  fmax_3   = "1e204800DNMf",
+  fmaxnm_3 = "1e206800DNMf",
+  fmin_3   = "1e205800DNMf",
+  fminnm_3 = "1e207800DNMf",
+
+  fcmp_2   = "1e202000NMf|1e202008NZf",
+  fcmpe_2  = "1e202010NMf|1e202018NZf",
+
+  fccmp_4  = "1e200400NMVCf",
+  fccmpe_4 = "1e200410NMVCf",
+
+  fcsel_4  = "1e200c00DNMCf",
+
+  -- TODO: crc32*, aes*, sha*, pmull
+  -- TODO: SIMD instructions.
+}
+
+for cond,c in pairs(map_cond) do
+  map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+  local op = tonumber(sub(template, 1, 8), 16)
+  local n = 1
+  local rtt = {}
+
+  parse_reg_type = false
+
+  -- Process each character.
+  for p in gmatch(sub(template, 9), ".") do
+    local q = params[n]
+    if p == "D" then
+      op = op + parse_reg(q); n = n + 1
+    elseif p == "N" then
+      op = op + shl(parse_reg(q), 5); n = n + 1
+    elseif p == "M" then
+      op = op + shl(parse_reg(q), 16); n = n + 1
+    elseif p == "A" then
+      op = op + shl(parse_reg(q), 10); n = n + 1
+    elseif p == "m" then
+      op = op + shl(parse_reg(params[n-1]), 16)
+
+    elseif p == "p" then
+      if q == "sp" then params[n] = "@x31" end
+    elseif p == "g" then
+      if parse_reg_type == "x" then
+	op = op + 0x80000000
+      elseif parse_reg_type ~= "w" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "f" then
+      if parse_reg_type == "d" then
+	op = op + 0x00400000
+      elseif parse_reg_type ~= "s" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "x" or p == "w" or p == "d" or p == "s" then
+      if parse_reg_type ~= p then
+	werror("register size mismatch")
+      end
+      parse_reg_type = false
+
+    elseif p == "L" then
+      op = parse_load(params, nparams, n, op)
+    elseif p == "P" then
+      op = parse_load_pair(params, nparams, n, op)
+
+    elseif p == "B" then
+      local mode, v, s = parse_label(q, false); n = n + 1
+      local m = branch_type(op)
+      waction("REL_"..mode, v+m, s, 1)
+
+    elseif p == "I" then
+      op = op + parse_imm12(q); n = n + 1
+    elseif p == "i" then
+      op = op + parse_imm13(q); n = n + 1
+    elseif p == "W" then
+      op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
+    elseif p == "T" then
+      op = op + parse_imm6(q); n = n + 1
+    elseif p == "1" then
+      op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
+    elseif p == "2" then
+      op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
+    elseif p == "5" then
+      op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
+    elseif p == "V" then
+      op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
+    elseif p == "F" then
+      op = op + parse_fpimm(q); n = n + 1
+    elseif p == "Z" then
+      if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
+      n = n + 1
+
+    elseif p == "S" then
+      op = op + parse_shift(q); n = n + 1
+    elseif p == "X" then
+      op = op + parse_extend(q); n = n + 1
+    elseif p == "R" then
+      op = op + parse_lslx16(q); n = n + 1
+    elseif p == "C" then
+      op = op + parse_cond(q, 0); n = n + 1
+    elseif p == "c" then
+      op = op + parse_cond(q, 1); n = n + 1
+
+    else
+      assert(false)
+    end
+  end
+  wputpos(pos, op)
+end
+
+function op_template(params, template, nparams)
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 3 positions.
+  if secpos+3 > maxsecpos then wflush() end
+  local pos = wpos()
+  local lpos, apos, spos = #actlist, #actargs, secpos
+
+  local ok, err
+  for t in gmatch(template, "[^|]+") do
+    ok, err = pcall(parse_template, params, t, nparams, pos)
+    if ok then return end
+    secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
+    actargs[apos+1] = nil
+    actargs[apos+2] = nil
+    actargs[apos+3] = nil
+  end
+  error(err, 0)
+end
+
+map_op[".template__"] = op_template
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _,p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+

+ 3 - 2
jni/LuaJIT-2.0.1/dynasm/dasm_mips.h → jni/LuaJIT-2.1/dynasm/dasm_mips.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM MIPS encoding engine.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -202,7 +202,8 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_REL_LG:
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
-	if (n >= 0) { CKPL(lg, LG); goto putrel; }  /* Bkwd rel or global. */
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
 	pl += 10; n = *pl;
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	goto linkrel;

+ 4 - 4
jni/LuaJIT-2.0.1/dynasm/dasm_mips.lua → jni/LuaJIT-2.1/dynasm/dasm_mips.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM MIPS module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -9,9 +9,9 @@
 local _info = {
   arch =	"mips",
   description =	"DynASM MIPS module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2012-01-23",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   license =	"MIT",
 }

+ 13 - 5
jni/LuaJIT-2.0.1/dynasm/dasm_ppc.h → jni/LuaJIT-2.1/dynasm/dasm_ppc.h

@@ -1,6 +1,6 @@
 /*
-** DynASM PPC encoding engine.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** DynASM PPC/PPC64 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -21,7 +21,7 @@ enum {
   /* The following actions need a buffer position. */
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   /* The following actions also have an argument. */
-  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
   DASM__MAX
 };
 
@@ -202,7 +202,8 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_REL_LG:
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
-	if (n >= 0) { CKPL(lg, LG); goto putrel; }  /* Bkwd rel or global. */
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
 	pl += 10; n = *pl;
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	goto linkrel;
@@ -243,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
 #endif
 	b[pos++] = n;
 	break;
+      case DASM_IMMSH:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
       }
     }
   }
@@ -298,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-	case DASM_IMM: pos++; break;
+	case DASM_IMM: case DASM_IMMSH: pos++; break;
 	}
       }
       stop: (void)0;
@@ -365,6 +370,9 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_IMM:
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  break;
+	case DASM_IMMSH:
+	  cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
+	  break;
 	default: *cp++ = ins; break;
 	}
       }

+ 687 - 17
jni/LuaJIT-2.0.1/dynasm/dasm_ppc.lua → jni/LuaJIT-2.1/dynasm/dasm_ppc.lua

@@ -1,17 +1,19 @@
 ------------------------------------------------------------------------------
--- DynASM PPC module.
+-- DynASM PPC/PPC64 module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
+--
+-- Support for various extensions contributed by Caio Souza Oliveira.
 ------------------------------------------------------------------------------
 
 -- Module information:
 local _info = {
   arch =	"ppc",
   description =	"DynASM PPC module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   license =	"MIT",
 }
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
 local action_names = {
   "STOP", "SECTION", "ESC", "REL_EXT",
   "ALIGN", "REL_LG", "LABEL_LG",
-  "REL_PC", "LABEL_PC", "IMM",
+  "REL_PC", "LABEL_PC", "IMM", "IMMSH"
 }
 
 -- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
 
 ------------------------------------------------------------------------------
 
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
 -- Template strings for PPC instructions.
-local map_op = {
+map_op = {
   tdi_3 =	"08000000ARI",
   twi_3 =	"0c000000ARI",
   mulli_3 =	"1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
   std_2 =	"f8000000RD",
   stdu_2 =	"f8000001RD",
 
+  subi_3 =	op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
+  subis_3 =	op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
+  subic_3 =	op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
+  ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
+
+  rotlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  rotrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
+  end),
+  rotlw_3 =	op_alias("rlwnm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  slwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[4] = "0"
+  end),
+  srwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
+  end),
+  clrlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "0"; p[5] = "31"
+  end),
+  clrrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
+  end),
+
+  -- Primary opcode 4:
+  mulhhwu_3 =		"10000010RRR.",
+  machhwu_3 =		"10000018RRR.",
+  mulhhw_3 =		"10000050RRR.",
+  nmachhw_3 =		"1000005cRRR.",
+  machhwsu_3 =		"10000098RRR.",
+  machhws_3 =		"100000d8RRR.",
+  nmachhws_3 =		"100000dcRRR.",
+  mulchwu_3 =		"10000110RRR.",
+  macchwu_3 =		"10000118RRR.",
+  mulchw_3 =		"10000150RRR.",
+  macchw_3 =		"10000158RRR.",
+  nmacchw_3 =		"1000015cRRR.",
+  macchwsu_3 =		"10000198RRR.",
+  macchws_3 =		"100001d8RRR.",
+  nmacchws_3 =		"100001dcRRR.",
+  mullhw_3 =		"10000350RRR.",
+  maclhw_3 =		"10000358RRR.",
+  nmaclhw_3 =		"1000035cRRR.",
+  maclhwsu_3 =		"10000398RRR.",
+  maclhws_3 =		"100003d8RRR.",
+  nmaclhws_3 =		"100003dcRRR.",
+  machhwuo_3 =		"10000418RRR.",
+  nmachhwo_3 =		"1000045cRRR.",
+  machhwsuo_3 =		"10000498RRR.",
+  machhwso_3 =		"100004d8RRR.",
+  nmachhwso_3 =		"100004dcRRR.",
+  macchwuo_3 =		"10000518RRR.",
+  macchwo_3 =		"10000558RRR.",
+  nmacchwo_3 =		"1000055cRRR.",
+  macchwsuo_3 =		"10000598RRR.",
+  macchwso_3 =		"100005d8RRR.",
+  nmacchwso_3 =		"100005dcRRR.",
+  maclhwo_3 =		"10000758RRR.",
+  nmaclhwo_3 =		"1000075cRRR.",
+  maclhwsuo_3 =		"10000798RRR.",
+  maclhwso_3 =		"100007d8RRR.",
+  nmaclhwso_3 =		"100007dcRRR.",
+
+  vaddubm_3 =		"10000000VVV",
+  vmaxub_3 =		"10000002VVV",
+  vrlb_3 =		"10000004VVV",
+  vcmpequb_3 =		"10000006VVV",
+  vmuloub_3 =		"10000008VVV",
+  vaddfp_3 =		"1000000aVVV",
+  vmrghb_3 =		"1000000cVVV",
+  vpkuhum_3 =		"1000000eVVV",
+  vmhaddshs_4 =		"10000020VVVV",
+  vmhraddshs_4 =	"10000021VVVV",
+  vmladduhm_4 =		"10000022VVVV",
+  vmsumubm_4 =		"10000024VVVV",
+  vmsummbm_4 =		"10000025VVVV",
+  vmsumuhm_4 =		"10000026VVVV",
+  vmsumuhs_4 =		"10000027VVVV",
+  vmsumshm_4 =		"10000028VVVV",
+  vmsumshs_4 =		"10000029VVVV",
+  vsel_4 =		"1000002aVVVV",
+  vperm_4 =		"1000002bVVVV",
+  vsldoi_4 =		"1000002cVVVP",
+  vpermxor_4 =		"1000002dVVVV",
+  vmaddfp_4 =		"1000002eVVVV~",
+  vnmsubfp_4 =		"1000002fVVVV~",
+  vaddeuqm_4 =		"1000003cVVVV",
+  vaddecuq_4 =		"1000003dVVVV",
+  vsubeuqm_4 =		"1000003eVVVV",
+  vsubecuq_4 =		"1000003fVVVV",
+  vadduhm_3 =		"10000040VVV",
+  vmaxuh_3 =		"10000042VVV",
+  vrlh_3 =		"10000044VVV",
+  vcmpequh_3 =		"10000046VVV",
+  vmulouh_3 =		"10000048VVV",
+  vsubfp_3 =		"1000004aVVV",
+  vmrghh_3 =		"1000004cVVV",
+  vpkuwum_3 =		"1000004eVVV",
+  vadduwm_3 =		"10000080VVV",
+  vmaxuw_3 =		"10000082VVV",
+  vrlw_3 =		"10000084VVV",
+  vcmpequw_3 =		"10000086VVV",
+  vmulouw_3 =		"10000088VVV",
+  vmuluwm_3 =		"10000089VVV",
+  vmrghw_3 =		"1000008cVVV",
+  vpkuhus_3 =		"1000008eVVV",
+  vaddudm_3 =		"100000c0VVV",
+  vmaxud_3 =		"100000c2VVV",
+  vrld_3 =		"100000c4VVV",
+  vcmpeqfp_3 =		"100000c6VVV",
+  vcmpequd_3 =		"100000c7VVV",
+  vpkuwus_3 =		"100000ceVVV",
+  vadduqm_3 =		"10000100VVV",
+  vmaxsb_3 =		"10000102VVV",
+  vslb_3 =		"10000104VVV",
+  vmulosb_3 =		"10000108VVV",
+  vrefp_2 =		"1000010aV-V",
+  vmrglb_3 =		"1000010cVVV",
+  vpkshus_3 =		"1000010eVVV",
+  vaddcuq_3 =		"10000140VVV",
+  vmaxsh_3 =		"10000142VVV",
+  vslh_3 =		"10000144VVV",
+  vmulosh_3 =		"10000148VVV",
+  vrsqrtefp_2 =		"1000014aV-V",
+  vmrglh_3 =		"1000014cVVV",
+  vpkswus_3 =		"1000014eVVV",
+  vaddcuw_3 =		"10000180VVV",
+  vmaxsw_3 =		"10000182VVV",
+  vslw_3 =		"10000184VVV",
+  vmulosw_3 =		"10000188VVV",
+  vexptefp_2 =		"1000018aV-V",
+  vmrglw_3 =		"1000018cVVV",
+  vpkshss_3 =		"1000018eVVV",
+  vmaxsd_3 =		"100001c2VVV",
+  vsl_3 =		"100001c4VVV",
+  vcmpgefp_3 =		"100001c6VVV",
+  vlogefp_2 =		"100001caV-V",
+  vpkswss_3 =		"100001ceVVV",
+  vadduhs_3 =		"10000240VVV",
+  vminuh_3 =		"10000242VVV",
+  vsrh_3 =		"10000244VVV",
+  vcmpgtuh_3 =		"10000246VVV",
+  vmuleuh_3 =		"10000248VVV",
+  vrfiz_2 =		"1000024aV-V",
+  vsplth_3 =		"1000024cVV3",
+  vupkhsh_2 =		"1000024eV-V",
+  vminuw_3 =		"10000282VVV",
+  vminud_3 =		"100002c2VVV",
+  vcmpgtud_3 =		"100002c7VVV",
+  vrfim_2 =		"100002caV-V",
+  vcmpgtsb_3 =		"10000306VVV",
+  vcfux_3 =		"1000030aVVA~",
+  vaddshs_3 =		"10000340VVV",
+  vminsh_3 =		"10000342VVV",
+  vsrah_3 =		"10000344VVV",
+  vcmpgtsh_3 =		"10000346VVV",
+  vmulesh_3 =		"10000348VVV",
+  vcfsx_3 =		"1000034aVVA~",
+  vspltish_2 =		"1000034cVS",
+  vupkhpx_2 =		"1000034eV-V",
+  vaddsws_3 =		"10000380VVV",
+  vminsw_3 =		"10000382VVV",
+  vsraw_3 =		"10000384VVV",
+  vcmpgtsw_3 =		"10000386VVV",
+  vmulesw_3 =		"10000388VVV",
+  vctuxs_3 =		"1000038aVVA~",
+  vspltisw_2 =		"1000038cVS",
+  vminsd_3 =		"100003c2VVV",
+  vsrad_3 =		"100003c4VVV",
+  vcmpbfp_3 =		"100003c6VVV",
+  vcmpgtsd_3 =		"100003c7VVV",
+  vctsxs_3 =		"100003caVVA~",
+  vupklpx_2 =		"100003ceV-V",
+  vsububm_3 =		"10000400VVV",
+  ["bcdadd._4"] =	"10000401VVVy.",
+  vavgub_3 =		"10000402VVV",
+  vand_3 =		"10000404VVV",
+  ["vcmpequb._3"] =	"10000406VVV",
+  vmaxfp_3 =		"1000040aVVV",
+  vsubuhm_3 =		"10000440VVV",
+  ["bcdsub._4"] =	"10000441VVVy.",
+  vavguh_3 =		"10000442VVV",
+  vandc_3 =		"10000444VVV",
+  ["vcmpequh._3"] =	"10000446VVV",
+  vminfp_3 =		"1000044aVVV",
+  vpkudum_3 =		"1000044eVVV",
+  vsubuwm_3 =		"10000480VVV",
+  vavguw_3 =		"10000482VVV",
+  vor_3 =		"10000484VVV",
+  ["vcmpequw._3"] =	"10000486VVV",
+  vpmsumw_3 =		"10000488VVV",
+  ["vcmpeqfp._3"] =	"100004c6VVV",
+  ["vcmpequd._3"] =	"100004c7VVV",
+  vpkudus_3 =		"100004ceVVV",
+  vavgsb_3 =		"10000502VVV",
+  vavgsh_3 =		"10000542VVV",
+  vorc_3 =		"10000544VVV",
+  vbpermq_3 =		"1000054cVVV",
+  vpksdus_3 =		"1000054eVVV",
+  vavgsw_3 =		"10000582VVV",
+  vsld_3 =		"100005c4VVV",
+  ["vcmpgefp._3"] =	"100005c6VVV",
+  vpksdss_3 =		"100005ceVVV",
+  vsububs_3 =		"10000600VVV",
+  mfvscr_1 =		"10000604V--",
+  vsum4ubs_3 =		"10000608VVV",
+  vsubuhs_3 =		"10000640VVV",
+  mtvscr_1 =		"10000644--V",
+  ["vcmpgtuh._3"] =	"10000646VVV",
+  vsum4shs_3 =		"10000648VVV",
+  vupkhsw_2 =		"1000064eV-V",
+  vsubuws_3 =		"10000680VVV",
+  vshasigmaw_4 =	"10000682VVYp",
+  veqv_3 =		"10000684VVV",
+  vsum2sws_3 =		"10000688VVV",
+  vmrgow_3 =		"1000068cVVV",
+  vshasigmad_4 =	"100006c2VVYp",
+  vsrd_3 =		"100006c4VVV",
+  ["vcmpgtud._3"] =	"100006c7VVV",
+  vupklsw_2 =		"100006ceV-V",
+  vupkslw_2 =		"100006ceV-V",
+  vsubsbs_3 =		"10000700VVV",
+  vclzb_2 =		"10000702V-V",
+  vpopcntb_2 =		"10000703V-V",
+  ["vcmpgtsb._3"] =	"10000706VVV",
+  vsum4sbs_3 =		"10000708VVV",
+  vsubshs_3 =		"10000740VVV",
+  vclzh_2 =		"10000742V-V",
+  vpopcnth_2 =		"10000743V-V",
+  ["vcmpgtsh._3"] =	"10000746VVV",
+  vsubsws_3 =		"10000780VVV",
+  vclzw_2 =		"10000782V-V",
+  vpopcntw_2 =		"10000783V-V",
+  ["vcmpgtsw._3"] =	"10000786VVV",
+  vsumsws_3 =		"10000788VVV",
+  vmrgew_3 =		"1000078cVVV",
+  vclzd_2 =		"100007c2V-V",
+  vpopcntd_2 =		"100007c3V-V",
+  ["vcmpbfp._3"] =	"100007c6VVV",
+  ["vcmpgtsd._3"] =	"100007c7VVV",
+
   -- Primary opcode 19:
   mcrf_2 =	"4c000000XX",
   isync_0 =	"4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
   bclrl_2 =	"4c000021AA",
   bcctr_2 =	"4c000420AA",
   bcctrl_2 =	"4c000421AA",
+  bctar_2 =	"4c000460AA",
+  bctarl_2 =	"4c000461AA",
   blr_0 =	"4e800020",
   blrl_0 =	"4e800021",
   bctr_0 =	"4e800420",
@@ -327,6 +585,7 @@ local map_op = {
   cmpd_3 =	"7c200000XRR",
   cmpd_2 =	"7c200000-RR",
   tw_3 =	"7c000008ARR",
+  lvsl_3 =	"7c00000cVRR",
   subfc_3 =	"7c000010RRR.",
   subc_3 =	"7c000010RRR~.",
   mulhdu_3 =	"7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
   cmplw_2 =	"7c000040-RR",
   cmpld_3 =	"7c200040XRR",
   cmpld_2 =	"7c200040-RR",
+  lvsr_3 =	"7c00004cVRR",
   subf_3 =	"7c000050RRR.",
   sub_3 =	"7c000050RRR~.",
+  lbarx_3 =	"7c000068RR0R",
   ldux_3 =	"7c00006aRR0R",
   dcbst_2 =	"7c00006c-RR",
   lwzux_3 =	"7c00006eRR0R",
   cntlzd_2 =	"7c000074RR~",
   andc_3 =	"7c000078RR~R.",
   td_3 =	"7c000088ARR",
+  lvewx_3 =	"7c00008eVRR",
   mulhd_3 =	"7c000092RRR.",
+  addg6s_3 =	"7c000094RRR",
   mulhw_3 =	"7c000096RRR.",
+  dlmzb_3 =	"7c00009cRR~R.",
   ldarx_3 =	"7c0000a8RR0R",
   dcbf_2 =	"7c0000ac-RR",
   lbzx_3 =	"7c0000aeRR0R",
+  lvx_3 =	"7c0000ceVRR",
   neg_2 =	"7c0000d0RR.",
+  lharx_3 =	"7c0000e8RR0R",
   lbzux_3 =	"7c0000eeRR0R",
   popcntb_2 =	"7c0000f4RR~",
   not_2 =	"7c0000f8RR~%.",
   nor_3 =	"7c0000f8RR~R.",
+  stvebx_3 =	"7c00010eVRR",
   subfe_3 =	"7c000110RRR.",
   sube_3 =	"7c000110RRR~.",
   adde_3 =	"7c000114RRR.",
   stdx_3 =	"7c00012aRR0R",
-  stwcx_3 =	"7c00012cRR0R.",
+  ["stwcx._3"] =	"7c00012dRR0R.",
   stwx_3 =	"7c00012eRR0R",
   prtyw_2 =	"7c000134RR~",
+  stvehx_3 =	"7c00014eVRR",
   stdux_3 =	"7c00016aRR0R",
+  ["stqcx._3"] =	"7c00016dR:R0R.",
   stwux_3 =	"7c00016eRR0R",
   prtyd_2 =	"7c000174RR~",
+  stvewx_3 =	"7c00018eVRR",
   subfze_2 =	"7c000190RR.",
   addze_2 =	"7c000194RR.",
-  stdcx_3 =	"7c0001acRR0R.",
+  ["stdcx._3"] =	"7c0001adRR0R.",
   stbx_3 =	"7c0001aeRR0R",
+  stvx_3 =	"7c0001ceVRR",
   subfme_2 =	"7c0001d0RR.",
   mulld_3 =	"7c0001d2RRR.",
   addme_2 =	"7c0001d4RR.",
   mullw_3 =	"7c0001d6RRR.",
   dcbtst_2 =	"7c0001ec-RR",
   stbux_3 =	"7c0001eeRR0R",
+  bpermd_3 =	"7c0001f8RR~R",
+  lvepxl_3 =	"7c00020eVRR",
   add_3 =	"7c000214RRR.",
+  lqarx_3 =	"7c000228R:R0R",
   dcbt_2 =	"7c00022c-RR",
   lhzx_3 =	"7c00022eRR0R",
+  cdtbcd_2 =	"7c000234RR~",
   eqv_3 =	"7c000238RR~R.",
+  lvepx_3 =	"7c00024eVRR",
   eciwx_3 =	"7c00026cRR0R",
   lhzux_3 =	"7c00026eRR0R",
+  cbcdtd_2 =	"7c000274RR~",
   xor_3 =	"7c000278RR~R.",
   mfspefscr_1 =	"7c0082a6R",
   mfxer_1 =	"7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
   lhax_3 =	"7c0002aeRR0R",
   mftb_1 =	"7c0c42e6R",
   mftbu_1 =	"7c0d42e6R",
+  lvxl_3 =	"7c0002ceVRR",
   lwaux_3 =	"7c0002eaRR0R",
   lhaux_3 =	"7c0002eeRR0R",
+  popcntw_2 =	"7c0002f4RR~",
+  divdeu_3 =	"7c000312RRR.",
+  divweu_3 =	"7c000316RRR.",
   sthx_3 =	"7c00032eRR0R",
   orc_3 =	"7c000338RR~R.",
   ecowx_3 =	"7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
   mtctr_1 =	"7c0903a6R",
   dcbi_2 =	"7c0003ac-RR",
   nand_3 =	"7c0003b8RR~R.",
+  dsn_2 =	"7c0003c6-RR",
+  stvxl_3 =	"7c0003ceVRR",
   divd_3 =	"7c0003d2RRR.",
   divw_3 =	"7c0003d6RRR.",
+  popcntd_2 =	"7c0003f4RR~",
   cmpb_3 =	"7c0003f8RR~R.",
   mcrxr_1 =	"7c000400X",
+  lbdx_3 =	"7c000406RRR",
   subfco_3 =	"7c000410RRR.",
   subco_3 =	"7c000410RRR~.",
   addco_3 =	"7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
   lfsx_3 =	"7c00042eFR0R",
   srw_3 =	"7c000430RR~R.",
   srd_3 =	"7c000436RR~R.",
+  lhdx_3 =	"7c000446RRR",
   subfo_3 =	"7c000450RRR.",
   subo_3 =	"7c000450RRR~.",
   lfsux_3 =	"7c00046eFR0R",
+  lwdx_3 =	"7c000486RRR",
   lswi_3 =	"7c0004aaRR0A",
   sync_0 =	"7c0004ac",
   lwsync_0 =	"7c2004ac",
   ptesync_0 =	"7c4004ac",
   lfdx_3 =	"7c0004aeFR0R",
+  lddx_3 =	"7c0004c6RRR",
   nego_2 =	"7c0004d0RR.",
   lfdux_3 =	"7c0004eeFR0R",
+  stbdx_3 =	"7c000506RRR",
   subfeo_3 =	"7c000510RRR.",
   subeo_3 =	"7c000510RRR~.",
   addeo_3 =	"7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
   stswx_3 =	"7c00052aRR0R",
   stwbrx_3 =	"7c00052cRR0R",
   stfsx_3 =	"7c00052eFR0R",
+  sthdx_3 =	"7c000546RRR",
+  ["stbcx._3"] =	"7c00056dRRR",
   stfsux_3 =	"7c00056eFR0R",
+  stwdx_3 =	"7c000586RRR",
   subfzeo_2 =	"7c000590RR.",
   addzeo_2 =	"7c000594RR.",
   stswi_3 =	"7c0005aaRR0A",
+  ["sthcx._3"] =	"7c0005adRRR",
   stfdx_3 =	"7c0005aeFR0R",
+  stddx_3 =	"7c0005c6RRR",
   subfmeo_2 =	"7c0005d0RR.",
   mulldo_3 =	"7c0005d2RRR.",
   addmeo_2 =	"7c0005d4RR.",
   mullwo_3 =	"7c0005d6RRR.",
   dcba_2 =	"7c0005ec-RR",
   stfdux_3 =	"7c0005eeFR0R",
+  stvepxl_3 =	"7c00060eVRR",
   addo_3 =	"7c000614RRR.",
   lhbrx_3 =	"7c00062cRR0R",
+  lfdpx_3 =	"7c00062eF:RR",
   sraw_3 =	"7c000630RR~R.",
   srad_3 =	"7c000634RR~R.",
+  lfddx_3 =	"7c000646FRR",
+  stvepx_3 =	"7c00064eVRR",
   srawi_3 =	"7c000670RR~A.",
   sradi_3 =	"7c000674RR~H.",
   eieio_0 =	"7c0006ac",
   lfiwax_3 =	"7c0006aeFR0R",
+  divdeuo_3 =	"7c000712RRR.",
+  divweuo_3 =	"7c000716RRR.",
   sthbrx_3 =	"7c00072cRR0R",
+  stfdpx_3 =	"7c00072eF:RR",
   extsh_2 =	"7c000734RR~.",
+  stfddx_3 =	"7c000746FRR",
+  divdeo_3 =	"7c000752RRR.",
+  divweo_3 =	"7c000756RRR.",
   extsb_2 =	"7c000774RR~.",
   divduo_3 =	"7c000792RRR.",
   divwou_3 =	"7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
   divwo_3 =	"7c0007d6RRR.",
   dcbz_2 =	"7c0007ec-RR",
 
+  ["tbegin._1"] =	"7c00051d1",
+  ["tbegin._0"] =	"7c00051d",
+  ["tend._1"] =		"7c00055dY",
+  ["tend._0"] =		"7c00055d",
+  ["tendall._0"] =	"7e00055d",
+  tcheck_1 =		"7c00059cX",
+  ["tsr._1"] =		"7c0005dd1",
+  ["tsuspend._0"] =	"7c0005dd",
+  ["tresume._0"] =	"7c2005dd",
+  ["tabortwc._3"] =	"7c00061dARR",
+  ["tabortdc._3"] =	"7c00065dARR",
+  ["tabortwci._3"] =	"7c00069dARS",
+  ["tabortdci._3"] =	"7c0006ddARS",
+  ["tabort._1"] =	"7c00071d-R-",
+  ["treclaim._1"] =	"7c00075d-R",
+  ["trechkpt._0"] =	"7c0007dd",
+
+  lxsiwzx_3 =	"7c000018QRR",
+  lxsiwax_3 =	"7c000098QRR",
+  mfvsrd_2 =	"7c000066-Rq",
+  mfvsrwz_2 =	"7c0000e6-Rq",
+  stxsiwx_3 =	"7c000118QRR",
+  mtvsrd_2 =	"7c000166QR",
+  mtvsrwa_2 =	"7c0001a6QR",
+  lxvdsx_3 =	"7c000298QRR",
+  lxsspx_3 =	"7c000418QRR",
+  lxsdx_3 =	"7c000498QRR",
+  stxsspx_3 =	"7c000518QRR",
+  stxsdx_3 =	"7c000598QRR",
+  lxvw4x_3 =	"7c000618QRR",
+  lxvd2x_3 =	"7c000698QRR",
+  stxvw4x_3 =	"7c000718QRR",
+  stxvd2x_3 =	"7c000798QRR",
+
   -- Primary opcode 30:
   rldicl_4 =	"78000000RR~HM.",
   rldicr_4 =	"78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
   rldcl_4 =	"78000010RR~RM.",
   rldcr_4 =	"78000012RR~RM.",
 
+  rotldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = "0"
+  end),
+  rotrdi_3 =	op_alias("rldicl_4", function(p)
+    p[3] = "64-("..p[3]..")"; p[4] = "0"
+  end),
+  rotld_3 =	op_alias("rldcl_4", function(p)
+    p[4] = "0"
+  end),
+  sldi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"
+  end),
+  srdi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "64-("..p[3]..")"
+  end),
+  clrldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "0"
+  end),
+  clrrdi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"; p[3] = "0"
+  end),
+
+  -- Primary opcode 56:
+  lq_2 =	"e0000000R:D", -- NYI: displacement must be divisible by 8.
+
+  -- Primary opcode 57:
+  lfdp_2 =	"e4000000F:D", -- NYI: displacement must be divisible by 4.
+
   -- Primary opcode 59:
   fdivs_3 =	"ec000024FFF.",
   fsubs_3 =	"ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
   fmadds_4 =	"ec00003aFFFF~.",
   fnmsubs_4 =	"ec00003cFFFF~.",
   fnmadds_4 =	"ec00003eFFFF~.",
+  fcfids_2 =	"ec00069cF-F.",
+  fcfidus_2 =	"ec00079cF-F.",
+
+  dadd_3 =	"ec000004FFF.",
+  dqua_4 =	"ec000006FFFZ.",
+  dmul_3 =	"ec000044FFF.",
+  drrnd_4 =	"ec000046FFFZ.",
+  dscli_3 =	"ec000084FF6.",
+  dquai_4 =	"ec000086SF~FZ.",
+  dscri_3 =	"ec0000c4FF6.",
+  drintx_4 =	"ec0000c61F~FZ.",
+  dcmpo_3 =	"ec000104XFF",
+  dtstex_3 =	"ec000144XFF",
+  dtstdc_3 =	"ec000184XF6",
+  dtstdg_3 =	"ec0001c4XF6",
+  drintn_4 =	"ec0001c61F~FZ.",
+  dctdp_2 =	"ec000204F-F.",
+  dctfix_2 =	"ec000244F-F.",
+  ddedpd_3 =	"ec000284ZF~F.",
+  dxex_2 =	"ec0002c4F-F.",
+  dsub_3 =	"ec000404FFF.",
+  ddiv_3 =	"ec000444FFF.",
+  dcmpu_3 =	"ec000504XFF",
+  dtstsf_3 =	"ec000544XFF",
+  drsp_2 =	"ec000604F-F.",
+  dcffix_2 =	"ec000644F-F.",
+  denbcd_3 =	"ec000684YF~F.",
+  diex_3 =	"ec0006c4FFF.",
+
+  -- Primary opcode 60:
+  xsaddsp_3 =		"f0000000QQQ",
+  xsmaddasp_3 =		"f0000008QQQ",
+  xxsldwi_4 =		"f0000010QQQz",
+  xsrsqrtesp_2 =	"f0000028Q-Q",
+  xssqrtsp_2 =		"f000002cQ-Q",
+  xxsel_4 =		"f0000030QQQQ",
+  xssubsp_3 =		"f0000040QQQ",
+  xsmaddmsp_3 =		"f0000048QQQ",
+  xxpermdi_4 =		"f0000050QQQz",
+  xsresp_2 =		"f0000068Q-Q",
+  xsmulsp_3 =		"f0000080QQQ",
+  xsmsubasp_3 =		"f0000088QQQ",
+  xxmrghw_3 =		"f0000090QQQ",
+  xsdivsp_3 =		"f00000c0QQQ",
+  xsmsubmsp_3 =		"f00000c8QQQ",
+  xsadddp_3 =		"f0000100QQQ",
+  xsmaddadp_3 =		"f0000108QQQ",
+  xscmpudp_3 =		"f0000118XQQ",
+  xscvdpuxws_2 =	"f0000120Q-Q",
+  xsrdpi_2 =		"f0000124Q-Q",
+  xsrsqrtedp_2 =	"f0000128Q-Q",
+  xssqrtdp_2 =		"f000012cQ-Q",
+  xssubdp_3 =		"f0000140QQQ",
+  xsmaddmdp_3 =		"f0000148QQQ",
+  xscmpodp_3 =		"f0000158XQQ",
+  xscvdpsxws_2 =	"f0000160Q-Q",
+  xsrdpiz_2 =		"f0000164Q-Q",
+  xsredp_2 =		"f0000168Q-Q",
+  xsmuldp_3 =		"f0000180QQQ",
+  xsmsubadp_3 =		"f0000188QQQ",
+  xxmrglw_3 =		"f0000190QQQ",
+  xsrdpip_2 =		"f00001a4Q-Q",
+  xstsqrtdp_2 =		"f00001a8X-Q",
+  xsrdpic_2 =		"f00001acQ-Q",
+  xsdivdp_3 =		"f00001c0QQQ",
+  xsmsubmdp_3 =		"f00001c8QQQ",
+  xsrdpim_2 =		"f00001e4Q-Q",
+  xstdivdp_3 =		"f00001e8XQQ",
+  xvaddsp_3 =		"f0000200QQQ",
+  xvmaddasp_3 =		"f0000208QQQ",
+  xvcmpeqsp_3 =		"f0000218QQQ",
+  xvcvspuxws_2 =	"f0000220Q-Q",
+  xvrspi_2 =		"f0000224Q-Q",
+  xvrsqrtesp_2 =	"f0000228Q-Q",
+  xvsqrtsp_2 =		"f000022cQ-Q",
+  xvsubsp_3 =		"f0000240QQQ",
+  xvmaddmsp_3 =		"f0000248QQQ",
+  xvcmpgtsp_3 =		"f0000258QQQ",
+  xvcvspsxws_2 =	"f0000260Q-Q",
+  xvrspiz_2 =		"f0000264Q-Q",
+  xvresp_2 =		"f0000268Q-Q",
+  xvmulsp_3 =		"f0000280QQQ",
+  xvmsubasp_3 =		"f0000288QQQ",
+  xxspltw_3 =		"f0000290QQg~",
+  xvcmpgesp_3 =		"f0000298QQQ",
+  xvcvuxwsp_2 =		"f00002a0Q-Q",
+  xvrspip_2 =		"f00002a4Q-Q",
+  xvtsqrtsp_2 =		"f00002a8X-Q",
+  xvrspic_2 =		"f00002acQ-Q",
+  xvdivsp_3 =		"f00002c0QQQ",
+  xvmsubmsp_3 =		"f00002c8QQQ",
+  xvcvsxwsp_2 =		"f00002e0Q-Q",
+  xvrspim_2 =		"f00002e4Q-Q",
+  xvtdivsp_3 =		"f00002e8XQQ",
+  xvadddp_3 =		"f0000300QQQ",
+  xvmaddadp_3 =		"f0000308QQQ",
+  xvcmpeqdp_3 =		"f0000318QQQ",
+  xvcvdpuxws_2 =	"f0000320Q-Q",
+  xvrdpi_2 =		"f0000324Q-Q",
+  xvrsqrtedp_2 =	"f0000328Q-Q",
+  xvsqrtdp_2 =		"f000032cQ-Q",
+  xvsubdp_3 =		"f0000340QQQ",
+  xvmaddmdp_3 =		"f0000348QQQ",
+  xvcmpgtdp_3 =		"f0000358QQQ",
+  xvcvdpsxws_2 =	"f0000360Q-Q",
+  xvrdpiz_2 =		"f0000364Q-Q",
+  xvredp_2 =		"f0000368Q-Q",
+  xvmuldp_3 =		"f0000380QQQ",
+  xvmsubadp_3 =		"f0000388QQQ",
+  xvcmpgedp_3 =		"f0000398QQQ",
+  xvcvuxwdp_2 =		"f00003a0Q-Q",
+  xvrdpip_2 =		"f00003a4Q-Q",
+  xvtsqrtdp_2 =		"f00003a8X-Q",
+  xvrdpic_2 =		"f00003acQ-Q",
+  xvdivdp_3 =		"f00003c0QQQ",
+  xvmsubmdp_3 =		"f00003c8QQQ",
+  xvcvsxwdp_2 =		"f00003e0Q-Q",
+  xvrdpim_2 =		"f00003e4Q-Q",
+  xvtdivdp_3 =		"f00003e8XQQ",
+  xsnmaddasp_3 =	"f0000408QQQ",
+  xxland_3 =		"f0000410QQQ",
+  xscvdpsp_2 =		"f0000424Q-Q",
+  xscvdpspn_2 =		"f000042cQ-Q",
+  xsnmaddmsp_3 =	"f0000448QQQ",
+  xxlandc_3 =		"f0000450QQQ",
+  xsrsp_2 =		"f0000464Q-Q",
+  xsnmsubasp_3 =	"f0000488QQQ",
+  xxlor_3 =		"f0000490QQQ",
+  xscvuxdsp_2 =		"f00004a0Q-Q",
+  xsnmsubmsp_3 =	"f00004c8QQQ",
+  xxlxor_3 =		"f00004d0QQQ",
+  xscvsxdsp_2 =		"f00004e0Q-Q",
+  xsmaxdp_3 =		"f0000500QQQ",
+  xsnmaddadp_3 =	"f0000508QQQ",
+  xxlnor_3 =		"f0000510QQQ",
+  xscvdpuxds_2 =	"f0000520Q-Q",
+  xscvspdp_2 =		"f0000524Q-Q",
+  xscvspdpn_2 =		"f000052cQ-Q",
+  xsmindp_3 =		"f0000540QQQ",
+  xsnmaddmdp_3 =	"f0000548QQQ",
+  xxlorc_3 =		"f0000550QQQ",
+  xscvdpsxds_2 =	"f0000560Q-Q",
+  xsabsdp_2 =		"f0000564Q-Q",
+  xscpsgndp_3 =		"f0000580QQQ",
+  xsnmsubadp_3 =	"f0000588QQQ",
+  xxlnand_3 =		"f0000590QQQ",
+  xscvuxddp_2 =		"f00005a0Q-Q",
+  xsnabsdp_2 =		"f00005a4Q-Q",
+  xsnmsubmdp_3 =	"f00005c8QQQ",
+  xxleqv_3 =		"f00005d0QQQ",
+  xscvsxddp_2 =		"f00005e0Q-Q",
+  xsnegdp_2 =		"f00005e4Q-Q",
+  xvmaxsp_3 =		"f0000600QQQ",
+  xvnmaddasp_3 =	"f0000608QQQ",
+  ["xvcmpeqsp._3"] =	"f0000618QQQ",
+  xvcvspuxds_2 =	"f0000620Q-Q",
+  xvcvdpsp_2 =		"f0000624Q-Q",
+  xvminsp_3 =		"f0000640QQQ",
+  xvnmaddmsp_3 =	"f0000648QQQ",
+  ["xvcmpgtsp._3"] =	"f0000658QQQ",
+  xvcvspsxds_2 =	"f0000660Q-Q",
+  xvabssp_2 =		"f0000664Q-Q",
+  xvcpsgnsp_3 =		"f0000680QQQ",
+  xvnmsubasp_3 =	"f0000688QQQ",
+  ["xvcmpgesp._3"] =	"f0000698QQQ",
+  xvcvuxdsp_2 =		"f00006a0Q-Q",
+  xvnabssp_2 =		"f00006a4Q-Q",
+  xvnmsubmsp_3 =	"f00006c8QQQ",
+  xvcvsxdsp_2 =		"f00006e0Q-Q",
+  xvnegsp_2 =		"f00006e4Q-Q",
+  xvmaxdp_3 =		"f0000700QQQ",
+  xvnmaddadp_3 =	"f0000708QQQ",
+  ["xvcmpeqdp._3"] =	"f0000718QQQ",
+  xvcvdpuxds_2 =	"f0000720Q-Q",
+  xvcvspdp_2 =		"f0000724Q-Q",
+  xvmindp_3 =		"f0000740QQQ",
+  xvnmaddmdp_3 =	"f0000748QQQ",
+  ["xvcmpgtdp._3"] =	"f0000758QQQ",
+  xvcvdpsxds_2 =	"f0000760Q-Q",
+  xvabsdp_2 =		"f0000764Q-Q",
+  xvcpsgndp_3 =		"f0000780QQQ",
+  xvnmsubadp_3 =	"f0000788QQQ",
+  ["xvcmpgedp._3"] =	"f0000798QQQ",
+  xvcvuxddp_2 =		"f00007a0Q-Q",
+  xvnabsdp_2 =		"f00007a4Q-Q",
+  xvnmsubmdp_3 =	"f00007c8QQQ",
+  xvcvsxddp_2 =		"f00007e0Q-Q",
+  xvnegdp_2 =		"f00007e4Q-Q",
+
+  -- Primary opcode 61:
+  stfdp_2 =	"f4000000F:D", -- NYI: displacement must be divisible by 4.
+
+  -- Primary opcode 62:
+  stq_2 =	"f8000002R:D", -- NYI: displacement must be divisible by 8.
 
   -- Primary opcode 63:
   fdiv_3 =	"fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
   frsp_2 =	"fc000018F-F.",
   fctiw_2 =	"fc00001cF-F.",
   fctiwz_2 =	"fc00001eF-F.",
+  ftdiv_2 =	"fc000100X-F.",
+  fctiwu_2 =	"fc00011cF-F.",
+  fctiwuz_2 =	"fc00011eF-F.",
   mtfsfi_2 =	"fc00010cAA", -- NYI: upshift.
   fnabs_2 =	"fc000110F-F.",
+  ftsqrt_2 =	"fc000140X-F.",
   fabs_2 =	"fc000210F-F.",
   frin_2 =	"fc000310F-F.",
   friz_2 =	"fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
   -- NYI: mtfsf, mtfsb0, mtfsb1.
   fctid_2 =	"fc00065cF-F.",
   fctidz_2 =	"fc00065eF-F.",
+  fmrgow_3 =	"fc00068cFFF",
   fcfid_2 =	"fc00069cF-F.",
+  fctidu_2 =	"fc00075cF-F.",
+  fctiduz_2 =	"fc00075eF-F.",
+  fmrgew_3 =	"fc00078cFFF",
+  fcfidu_2 =	"fc00079cF-F.",
+
+  daddq_3 =	"fc000004F:F:F:.",
+  dquaq_4 =	"fc000006F:F:F:Z.",
+  dmulq_3 =	"fc000044F:F:F:.",
+  drrndq_4 =	"fc000046F:F:F:Z.",
+  dscliq_3 =	"fc000084F:F:6.",
+  dquaiq_4 =	"fc000086SF:~F:Z.",
+  dscriq_3 =	"fc0000c4F:F:6.",
+  drintxq_4 =	"fc0000c61F:~F:Z.",
+  dcmpoq_3 =	"fc000104XF:F:",
+  dtstexq_3 =	"fc000144XF:F:",
+  dtstdcq_3 =	"fc000184XF:6",
+  dtstdgq_3 =	"fc0001c4XF:6",
+  drintnq_4 =	"fc0001c61F:~F:Z.",
+  dctqpq_2 =	"fc000204F:-F:.",
+  dctfixq_2 =	"fc000244F:-F:.",
+  ddedpdq_3 =	"fc000284ZF:~F:.",
+  dxexq_2 =	"fc0002c4F:-F:.",
+  dsubq_3 =	"fc000404F:F:F:.",
+  ddivq_3 =	"fc000444F:F:F:.",
+  dcmpuq_3 =	"fc000504XF:F:",
+  dtstsfq_3 =	"fc000544XF:F:",
+  drdpq_2 =	"fc000604F:-F:.",
+  dcffixq_2 =	"fc000644F:-F:.",
+  denbcdq_3 =	"fc000684YF:~F:.",
+  diexq_3 =	"fc0006c4F:FF:.",
 
   -- Primary opcode 4, SPE APU extension:
   evaddw_3 =		"10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
 do
   local t = {}
   for k,v in pairs(map_op) do
-    if sub(v, -1) == "." then
+    if type(v) == "string" and sub(v, -1) == "." then
       local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
       t[sub(k, 1, -3).."."..sub(k, -2)] = v2
     end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
   werror("bad register name `"..expr.."'")
 end
 
+local function parse_vr(expr)
+  local r = match(expr, "^v([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_vs(expr)
+  local r = match(expr, "^vs([1-6]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 63 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
 local function parse_cr(expr)
   local r = match(expr, "^cr([0-7])$")
   if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
   werror("bad condition bit name `"..expr.."'")
 end
 
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
 local function parse_imm(imm, bits, shift, scale, signed)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
     local m = sar(n, scale)
     if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
       end
     end
     werror("out of range immediate `"..imm.."'")
-  elseif match(imm, "^r([1-3]?[0-9])$") or
+  elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+	 match(imm, "^vs([1-6]?[0-9])$") or
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
   else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
 end
 
 local function parse_shiftmask(imm, isshift)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
     if shr(n, 6) == 0 then
-      local lsb = band(imm, 31)
-      local msb = imm - lsb
+      local lsb = band(n, 31)
+      local msb = n - lsb
       return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
     end
     werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
   else
-    werror("NYI: parameterized 64 bit shift/mask")
+    waction("IMMSH", isshift and 1 or 0, imm)
+    return 0;
   end
 end
 
@@ -1011,7 +1648,7 @@ end
 ------------------------------------------------------------------------------
 
 -- Handle opcodes defined with template strings.
-map_op[".template__"] = function(params, template, nparams)
+op_template = function(params, template, nparams)
   if not params then return sub(template, 9) end
   local op = tonumber(sub(template, 1, 8), 16)
   local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
     elseif p == "F" then
       rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
+    elseif p == "V" then
+      rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
+    elseif p == "Q" then
+      local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
+      local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
+      op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
+    elseif p == "q" then
+      local vs = parse_vs(params[n]); n = n + 1
+      op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
     elseif p == "A" then
       rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
     elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
     elseif p == "X" then
       rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
+    elseif p == "1" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
+    elseif p == "g" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
+    elseif p == "3" then
+      rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
+    elseif p == "P" then
+      rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "p" then
+      op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "6" then
+      rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
+    elseif p == "Y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
+    elseif p == "y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
+    elseif p == "Z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
+    elseif p == "z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
     elseif p == "W" then
       op = op + parse_cr(params[n]); n = n + 1
     elseif p == "G" then
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
       local lo = band(op, mm)
       local hi = band(op, shl(mm, 5))
       op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
+    elseif p == ":" then
+      if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
     elseif p == "-" then
       rs = rs - 5
     elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
   wputpos(pos, op)
 end
 
+map_op[".template__"] = op_template
+
 ------------------------------------------------------------------------------
 
 -- Pseudo-opcode to mark the position where the action list is to be emitted.

+ 3 - 3
jni/LuaJIT-2.0.1/dynasm/dasm_proto.h → jni/LuaJIT-2.1/dynasm/dasm_proto.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM encoding engine prototypes.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -10,8 +10,8 @@
 #include <stddef.h>
 #include <stdarg.h>
 
-#define DASM_IDENT	"DynASM 1.3.0"
-#define DASM_VERSION	10300	/* 1.3.0 */
+#define DASM_IDENT	"DynASM 1.4.0"
+#define DASM_VERSION	10400	/* 1.4.0 */
 
 #ifndef Dst_DECL
 #define Dst_DECL	dasm_State **Dst

+ 1 - 1
jni/LuaJIT-2.0.1/dynasm/dasm_x64.lua → jni/LuaJIT-2.1/dynasm/dasm_x64.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM x64 module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 -- This module just sets 64 bit mode for the combined x86/x64 module.

+ 9 - 3
jni/LuaJIT-2.0.1/dynasm/dasm_x86.h → jni/LuaJIT-2.1/dynasm/dasm_x86.h

@@ -1,6 +1,6 @@
 /*
 ** DynASM x86 encoding engine.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -213,7 +213,8 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_REL_LG:
       case DASM_IMM_LG:
 	n = *p++; pl = D->lglabels + n;
-	if (n <= 246) { CKPL(lg, LG); goto putrel; }  /* Bkwd rel or global. */
+	/* Bkwd rel or global. */
+	if (n <= 246) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
 	pl -= 246; n = *pl;
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	goto linkrel;
@@ -390,7 +391,12 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_IMM_D: wd: dasmd(n); break;
 	case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
 	case DASM_IMM_W: dasmw(n); break;
-	case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
+	case DASM_VREG: {
+	  int t = *p++;
+	  if (t >= 5) n <<= 4; else if (t >= 2) n <<= 3;
+	  cp[-1] ^= n;
+	  break;
+	}
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
 	  b++; n = (int)(ptrdiff_t)D->globals[-n];
 	case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */

+ 366 - 80
jni/LuaJIT-2.0.1/dynasm/dasm_x86.lua → jni/LuaJIT-2.1/dynasm/dasm_x86.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM x86/x64 module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -11,9 +11,9 @@ local x64 = x64
 local _info = {
   arch =	x64 and "x64" or "x86",
   description =	"DynASM x86/x64 module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   license =	"MIT",
 }
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
 local _s = string
 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
-local concat, sort = table.concat, table.sort
+local concat, sort, remove = table.concat, table.sort, table.remove
 local bit = bit or require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
 
 -- Inherited tables and callbacks.
 local g_opt, g_arch
@@ -299,7 +299,7 @@ local function mkrmap(sz, cl, names)
     local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
     if needrex then map_reg_needrex[iname] = true end
     local name
-    if sz == "o" then name = format("xmm%d", i)
+    if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
     elseif sz == "f" then name = format("st%d", i)
     else name = format("r%d%s", i, sz == addrsize and "" or sz) end
     map_archdef[name] = iname
@@ -334,21 +334,24 @@ mkrmap("f", "Rf")
 -- SSE registers (oword sized, but qword and dword accessible).
 mkrmap("o", "xmm")
 
+-- AVX registers (yword sized, but oword, qword and dword accessible).
+mkrmap("y", "ymm")
+
 -- Operand size prefixes to codes.
 local map_opsize = {
-  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
-  aword = addrsize,
+  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
+  tword = "t", aword = addrsize,
 }
 
 -- Operand size code to number.
 local map_opsizenum = {
-  b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
+  b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
 }
 
 -- Operand size code to name.
 local map_opsizename = {
-  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
-  f = "fpword",
+  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
+  t = "tword", f = "fpword",
 }
 
 -- Valid index register scale factors.
@@ -460,7 +463,29 @@ local function wputszarg(sz, n)
 end
 
 -- Put multi-byte opcode with operand-size dependent modifications.
-local function wputop(sz, op, rex)
+local function wputop(sz, op, rex, vex)
+  if vex then
+    local tail
+    if vex.m == 1 and band(rex, 11) == 0 then
+      wputb(0xc5)
+      tail = shl(bxor(band(rex, 4), 4), 5)
+    else
+      wputb(0xc4)
+      wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
+      tail = shl(band(rex, 8), 4)
+    end
+    local reg, vreg = 0, nil
+    if vex.v then
+      reg = vex.v.reg
+      if not reg then werror("bad vex operand") end
+      if reg < 0 then reg = 0; vreg = vex.v.vreg end
+    end
+    if sz == "y" or vex.l then tail = tail + 4 end
+    wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
+    if vreg then waction("VREG", vreg); wputxb(4) end
+    rex = 0
+    if op >= 256 then werror("bad vex opcode") end
+  end
   local r
   if rex ~= 0 and not x64 then werror("bad operand size") end
   if sz == "w" then wputb(102) end
@@ -881,9 +906,15 @@ end
 --   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
 --             The spare 3 bits are either filled with the last hex digit or
 --             the result from a previous "r"/"R". The opcode is restored.
+--   "u"       Use VEX encoding, vvvv unused.
+--   "v"/"V"   Use VEX encoding, vvvv from 1st/2nd operand (the operand is
+--             removed from the list used by future characters).
+--   "L"       Force VEX.L
 --
 -- All of the following characters force a flush of the opcode:
 --   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
+--   "s"       stores a 4 bit immediate from the last register operand,
+--             followed by 4 zero bits.
 --   "S"       stores a signed 8 bit immediate from the last operand.
 --   "U"       stores an unsigned 8 bit immediate from the last operand.
 --   "W"       stores an unsigned 16 bit immediate from the last operand.
@@ -1040,7 +1071,7 @@ local map_op = {
   -- ED: *in Rdw,dx
   -- EE: *out dx,Rb
   -- EF: *out dx,Rdw
-  -- F0: *lock
+  lock_0 =	"F0",
   int1_0 =	"F1",
   repne_0 =	"F2",
   repnz_0 =	"F2",
@@ -1081,7 +1112,11 @@ local map_op = {
   btr_2 =	"mrqdw:0FB3Rm|miqdw:0FBA6mU",
   bts_2 =	"mrqdw:0FABRm|miqdw:0FBA5mU",
 
+  shld_3 =	"mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
+  shrd_3 =	"mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
+
   rdtsc_0 =	"0F31", -- P1+
+  rdpmc_0 =	"0F33", -- P6+
   cpuid_0 =	"0FA2", -- P1+
 
   -- floating point ops
@@ -1114,6 +1149,9 @@ local map_op = {
   fucompp_0 =	"DAE9",
   fcompp_0 =	"DED9",
 
+  fldenv_1 =	"x.:D94m",
+  fnstenv_1 =	"x.:D96m",
+  fstenv_1 =	"x.:9BD96m",
   fldcw_1 =	"xw:nD95m",
   fstcw_1 =	"xw:n9BD97m",
   fnstcw_1 =	"xw:nD97m",
@@ -1184,11 +1222,13 @@ local map_op = {
   cvtsi2sd_2 =	"rm/od:F20F2ArM|rm/oq:F20F2ArXM",
   cvtsi2ss_2 =	"rm/od:F30F2ArM|rm/oq:F30F2ArXM",
   cvtss2sd_2 =	"rro:F30F5ArM|rx/od:",
-  cvtss2si_2 =	"rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
+  cvtss2si_2 =	"rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
   cvttpd2dq_2 =	"rmo:660FE6rM",
   cvttps2dq_2 =	"rmo:F30F5BrM",
   cvttsd2si_2 =	"rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
   cvttss2si_2 =	"rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
+  fxsave_1 =	"x.:0FAE0m",
+  fxrstor_1 =	"x.:0FAE1m",
   ldmxcsr_1 =	"xd:0FAE2m",
   lfence_0 =	"0FAEE8",
   maskmovdqu_2 = "rro:660FF7rM",
@@ -1217,46 +1257,14 @@ local map_op = {
   movups_2 =	"rmo:0F10rM|mro:0F11Rm",
   orpd_2 =	"rmo:660F56rM",
   orps_2 =	"rmo:0F56rM",
-  packssdw_2 =	"rmo:660F6BrM",
-  packsswb_2 =	"rmo:660F63rM",
-  packuswb_2 =	"rmo:660F67rM",
-  paddb_2 =	"rmo:660FFCrM",
-  paddd_2 =	"rmo:660FFErM",
-  paddq_2 =	"rmo:660FD4rM",
-  paddsb_2 =	"rmo:660FECrM",
-  paddsw_2 =	"rmo:660FEDrM",
-  paddusb_2 =	"rmo:660FDCrM",
-  paddusw_2 =	"rmo:660FDDrM",
-  paddw_2 =	"rmo:660FFDrM",
-  pand_2 =	"rmo:660FDBrM",
-  pandn_2 =	"rmo:660FDFrM",
   pause_0 =	"F390",
-  pavgb_2 =	"rmo:660FE0rM",
-  pavgw_2 =	"rmo:660FE3rM",
-  pcmpeqb_2 =	"rmo:660F74rM",
-  pcmpeqd_2 =	"rmo:660F76rM",
-  pcmpeqw_2 =	"rmo:660F75rM",
-  pcmpgtb_2 =	"rmo:660F64rM",
-  pcmpgtd_2 =	"rmo:660F66rM",
-  pcmpgtw_2 =	"rmo:660F65rM",
-  pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
+  pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
   pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
-  pmaddwd_2 =	"rmo:660FF5rM",
-  pmaxsw_2 =	"rmo:660FEErM",
-  pmaxub_2 =	"rmo:660FDErM",
-  pminsw_2 =	"rmo:660FEArM",
-  pminub_2 =	"rmo:660FDArM",
   pmovmskb_2 =	"rr/do:660FD7rM",
-  pmulhuw_2 =	"rmo:660FE4rM",
-  pmulhw_2 =	"rmo:660FE5rM",
-  pmullw_2 =	"rmo:660FD5rM",
-  pmuludq_2 =	"rmo:660FF4rM",
-  por_2 =	"rmo:660FEBrM",
   prefetchnta_1 = "xb:n0F180m",
   prefetcht0_1 = "xb:n0F181m",
   prefetcht1_1 = "xb:n0F182m",
   prefetcht2_1 = "xb:n0F183m",
-  psadbw_2 =	"rmo:660FF6rM",
   pshufd_3 =	"rmio:660F70rMU",
   pshufhw_3 =	"rmio:F30F70rMU",
   pshuflw_3 =	"rmio:F20F70rMU",
@@ -1270,23 +1278,6 @@ local map_op = {
   psrldq_2 =	"rio:660F733mU",
   psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
   psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
-  psubb_2 =	"rmo:660FF8rM",
-  psubd_2 =	"rmo:660FFArM",
-  psubq_2 =	"rmo:660FFBrM",
-  psubsb_2 =	"rmo:660FE8rM",
-  psubsw_2 =	"rmo:660FE9rM",
-  psubusb_2 =	"rmo:660FD8rM",
-  psubusw_2 =	"rmo:660FD9rM",
-  psubw_2 =	"rmo:660FF9rM",
-  punpckhbw_2 =	"rmo:660F68rM",
-  punpckhdq_2 =	"rmo:660F6ArM",
-  punpckhqdq_2 = "rmo:660F6DrM",
-  punpckhwd_2 =	"rmo:660F69rM",
-  punpcklbw_2 =	"rmo:660F60rM",
-  punpckldq_2 =	"rmo:660F62rM",
-  punpcklqdq_2 = "rmo:660F6CrM",
-  punpcklwd_2 =	"rmo:660F61rM",
-  pxor_2 =	"rmo:660FEFrM",
   rcpps_2 =	"rmo:0F53rM",
   rcpss_2 =	"rro:F30F53rM|rx/od:",
   rsqrtps_2 =	"rmo:0F52rM",
@@ -1344,7 +1335,7 @@ local map_op = {
   dpps_3 =	"rmio:660F3A40rMU",
   extractps_3 =	"mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
   insertps_3 =	"rrio:660F3A41rMU|rxi/od:",
-  movntdqa_2 =	"rmo:660F382ArM",
+  movntdqa_2 =	"rxo:660F382ArM",
   mpsadbw_3 =	"rmio:660F3A42rMU",
   packusdw_2 =	"rmo:660F382BrM",
   pblendvb_3 =	"rmRo:660F3810rM",
@@ -1404,6 +1395,232 @@ local map_op = {
   movntsd_2 =	"xr/qo:nF20F2BRm",
   movntss_2 =	"xr/do:F30F2BRm",
   -- popcnt is also in SSE4.2
+
+  -- AES-NI
+  aesdec_2 =	"rmo:660F38DErM",
+  aesdeclast_2 = "rmo:660F38DFrM",
+  aesenc_2 =	"rmo:660F38DCrM",
+  aesenclast_2 = "rmo:660F38DDrM",
+  aesimc_2 =	"rmo:660F38DBrM",
+  aeskeygenassist_3 = "rmio:660F3ADFrMU",
+  pclmulqdq_3 =	"rmio:660F3A44rMU",
+
+   -- AVX FP ops
+  vaddsubpd_3 =	"rrmoy:660FVD0rM",
+  vaddsubps_3 =	"rrmoy:F20FVD0rM",
+  vandpd_3 =	"rrmoy:660FV54rM",
+  vandps_3 =	"rrmoy:0FV54rM",
+  vandnpd_3 =	"rrmoy:660FV55rM",
+  vandnps_3 =	"rrmoy:0FV55rM",
+  vblendpd_4 =	"rrmioy:660F3AV0DrMU",
+  vblendps_4 =	"rrmioy:660F3AV0CrMU",
+  vblendvpd_4 =	"rrmroy:660F3AV4BrMs",
+  vblendvps_4 =	"rrmroy:660F3AV4ArMs",
+  vbroadcastf128_2 = "rx/yo:660F38u1ArM",
+  vcmppd_4 =	"rrmioy:660FVC2rMU",
+  vcmpps_4 =	"rrmioy:0FVC2rMU",
+  vcmpsd_4 =	"rrrio:F20FVC2rMU|rrxi/ooq:",
+  vcmpss_4 =	"rrrio:F30FVC2rMU|rrxi/ood:",
+  vcomisd_2 =	"rro:660Fu2FrM|rx/oq:",
+  vcomiss_2 =	"rro:0Fu2FrM|rx/od:",
+  vcvtdq2pd_2 =	"rro:F30FuE6rM|rx/oq:|rm/yo:",
+  vcvtdq2ps_2 =	"rmoy:0Fu5BrM",
+  vcvtpd2dq_2 =	"rmoy:F20FuE6rM",
+  vcvtpd2ps_2 =	"rmoy:660Fu5ArM",
+  vcvtps2dq_2 =	"rmoy:660Fu5BrM",
+  vcvtps2pd_2 =	"rro:0Fu5ArM|rx/oq:|rm/yo:",
+  vcvtsd2si_2 =	"rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
+  vcvtsd2ss_3 =	"rrro:F20FV5ArM|rrx/ooq:",
+  vcvtsi2sd_3 =	"rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
+  vcvtsi2ss_3 =	"rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
+  vcvtss2sd_3 =	"rrro:F30FV5ArM|rrx/ood:",
+  vcvtss2si_2 =	"rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
+  vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
+  vcvttps2dq_2 = "rmoy:F30Fu5BrM",
+  vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
+  vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
+  vdppd_4 =	"rrmio:660F3AV41rMU",
+  vdpps_4 =	"rrmioy:660F3AV40rMU",
+  vextractf128_3 = "mri/oy:660F3AuL19RmU",
+  vextractps_3 = "mri/do:660F3Au17RmU",
+  vhaddpd_3 =	"rrmoy:660FV7CrM",
+  vhaddps_3 =	"rrmoy:F20FV7CrM",
+  vhsubpd_3 =	"rrmoy:660FV7DrM",
+  vhsubps_3 =	"rrmoy:F20FV7DrM",
+  vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
+  vinsertps_4 =	"rrrio:660F3AV21rMU|rrxi/ood:",
+  vldmxcsr_1 =	"xd:0FuAE2m",
+  vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
+  vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
+  vmovapd_2 =	"rmoy:660Fu28rM|mroy:660Fu29Rm",
+  vmovaps_2 =	"rmoy:0Fu28rM|mroy:0Fu29Rm",
+  vmovd_2 =	"rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
+  vmovq_2 =	"rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
+  vmovddup_2 =	"rmy:F20Fu12rM|rro:|rx/oq:",
+  vmovhlps_3 =	"rrro:0FV12rM",
+  vmovhpd_2 =	"xr/qo:660Fu17Rm",
+  vmovhpd_3 =	"rrx/ooq:660FV16rM",
+  vmovhps_2 =	"xr/qo:0Fu17Rm",
+  vmovhps_3 =	"rrx/ooq:0FV16rM",
+  vmovlhps_3 =	"rrro:0FV16rM",
+  vmovlpd_2 =	"xr/qo:660Fu13Rm",
+  vmovlpd_3 =	"rrx/ooq:660FV12rM",
+  vmovlps_2 =	"xr/qo:0Fu13Rm",
+  vmovlps_3 =	"rrx/ooq:0FV12rM",
+  vmovmskpd_2 =	"rr/do:660Fu50rM|rr/dy:660FuL50rM",
+  vmovmskps_2 =	"rr/do:0Fu50rM|rr/dy:0FuL50rM",
+  vmovntpd_2 =	"xroy:660Fu2BRm",
+  vmovntps_2 =	"xroy:0Fu2BRm",
+  vmovsd_2 =	"rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
+  vmovsd_3 =	"rrro:F20FV10rM",
+  vmovshdup_2 =	"rmoy:F30Fu16rM",
+  vmovsldup_2 =	"rmoy:F30Fu12rM",
+  vmovss_2 =	"rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
+  vmovss_3 =	"rrro:F30FV10rM",
+  vmovupd_2 =	"rmoy:660Fu10rM|mroy:660Fu11Rm",
+  vmovups_2 =	"rmoy:0Fu10rM|mroy:0Fu11Rm",
+  vorpd_3 =	"rrmoy:660FV56rM",
+  vorps_3 =	"rrmoy:0FV56rM",
+  vpermilpd_3 =	"rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
+  vpermilps_3 =	"rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
+  vperm2f128_4 = "rrmiy:660F3AV06rMU",
+  vptestpd_2 =	"rmoy:660F38u0FrM",
+  vptestps_2 =	"rmoy:660F38u0ErM",
+  vrcpps_2 =	"rmoy:0Fu53rM",
+  vrcpss_3 =	"rrro:F30FV53rM|rrx/ood:",
+  vrsqrtps_2 =	"rmoy:0Fu52rM",
+  vrsqrtss_3 =	"rrro:F30FV52rM|rrx/ood:",
+  vroundpd_3 =	"rmioy:660F3AV09rMU",
+  vroundps_3 =	"rmioy:660F3AV08rMU",
+  vroundsd_4 =	"rrrio:660F3AV0BrMU|rrxi/ooq:",
+  vroundss_4 =	"rrrio:660F3AV0ArMU|rrxi/ood:",
+  vshufpd_4 =	"rrmioy:660FVC6rMU",
+  vshufps_4 =	"rrmioy:0FVC6rMU",
+  vsqrtps_2 =	"rmoy:0Fu51rM",
+  vsqrtss_2 =	"rro:F30Fu51rM|rx/od:",
+  vsqrtpd_2 =	"rmoy:660Fu51rM",
+  vsqrtsd_2 =	"rro:F20Fu51rM|rx/oq:",
+  vstmxcsr_1 =	"xd:0FuAE3m",
+  vucomisd_2 =	"rro:660Fu2ErM|rx/oq:",
+  vucomiss_2 =	"rro:0Fu2ErM|rx/od:",
+  vunpckhpd_3 =	"rrmoy:660FV15rM",
+  vunpckhps_3 =	"rrmoy:0FV15rM",
+  vunpcklpd_3 =	"rrmoy:660FV14rM",
+  vunpcklps_3 =	"rrmoy:0FV14rM",
+  vxorpd_3 =	"rrmoy:660FV57rM",
+  vxorps_3 =	"rrmoy:0FV57rM",
+  vzeroall_0 =	"0FuL77",
+  vzeroupper_0 = "0Fu77",
+
+  -- AVX2 FP ops
+  vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
+  vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
+  -- *vgather* (!vsib)
+  vpermpd_3 =	"rmiy:660F3AuX01rMU",
+  vpermps_3 =	"rrmy:660F38V16rM",
+
+  -- AVX, AVX2 integer ops
+  -- In general, xmm requires AVX, ymm requires AVX2.
+  vlddqu_2 =	"rxoy:F20FuF0rM",
+  vmaskmovdqu_2 = "rro:660FuF7rM",
+  vmovdqa_2 =	"rmoy:660Fu6FrM|mroy:660Fu7FRm",
+  vmovdqu_2 =	"rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
+  vmovntdq_2 =	"xroy:660FuE7Rm",
+  vmovntdqa_2 =	"rxoy:660F38u2ArM",
+  vmpsadbw_4 =	"rrmioy:660F3AV42rMU",
+  vpabsb_2 =	"rmoy:660F38u1CrM",
+  vpabsd_2 =	"rmoy:660F38u1ErM",
+  vpabsw_2 =	"rmoy:660F38u1DrM",
+  vpackusdw_3 =	"rrmoy:660F38V2BrM",
+  vpalignr_4 =	"rrmioy:660F3AV0FrMU",
+  vpblendvb_4 =	"rrmroy:660F3AV4CrMs",
+  vpblendw_4 =	"rrmioy:660F3AV0ErMU",
+  vpclmulqdq_4 = "rrmio:660F3AV44rMU",
+  vpcmpeqq_3 =	"rrmoy:660F38V29rM",
+  vpcmpestri_3 = "rmio:660F3Au61rMU",
+  vpcmpestrm_3 = "rmio:660F3Au60rMU",
+  vpcmpgtq_3 =	"rrmoy:660F38V37rM",
+  vpcmpistri_3 = "rmio:660F3Au63rMU",
+  vpcmpistrm_3 = "rmio:660F3Au62rMU",
+  vpextrb_3 =	"rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
+  vpextrw_3 =	"rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
+  vpextrd_3 =	"mri/do:660F3Au16RmU",
+  vpextrq_3 =	"mri/qo:660F3Au16RmU",
+  vphaddw_3 =	"rrmoy:660F38V01rM",
+  vphaddd_3 =	"rrmoy:660F38V02rM",
+  vphaddsw_3 =	"rrmoy:660F38V03rM",
+  vphminposuw_2 = "rmo:660F38u41rM",
+  vphsubw_3 =	"rrmoy:660F38V05rM",
+  vphsubd_3 =	"rrmoy:660F38V06rM",
+  vphsubsw_3 =	"rrmoy:660F38V07rM",
+  vpinsrb_4 =	"rrri/ood:660F3AV20rMU|rrxi/oob:",
+  vpinsrw_4 =	"rrri/ood:660FVC4rMU|rrxi/oow:",
+  vpinsrd_4 =	"rrmi/ood:660F3AV22rMU",
+  vpinsrq_4 =	"rrmi/ooq:660F3AVX22rMU",
+  vpmaddubsw_3 = "rrmoy:660F38V04rM",
+  vpmaxsb_3 =	"rrmoy:660F38V3CrM",
+  vpmaxsd_3 =	"rrmoy:660F38V3DrM",
+  vpmaxuw_3 =	"rrmoy:660F38V3ErM",
+  vpmaxud_3 =	"rrmoy:660F38V3FrM",
+  vpminsb_3 =	"rrmoy:660F38V38rM",
+  vpminsd_3 =	"rrmoy:660F38V39rM",
+  vpminuw_3 =	"rrmoy:660F38V3ArM",
+  vpminud_3 =	"rrmoy:660F38V3BrM",
+  vpmovmskb_2 =	"rr/do:660FuD7rM|rr/dy:660FuLD7rM",
+  vpmovsxbw_2 =	"rroy:660F38u20rM|rx/oq:|rx/yo:",
+  vpmovsxbd_2 =	"rroy:660F38u21rM|rx/od:|rx/yq:",
+  vpmovsxbq_2 =	"rroy:660F38u22rM|rx/ow:|rx/yd:",
+  vpmovsxwd_2 =	"rroy:660F38u23rM|rx/oq:|rx/yo:",
+  vpmovsxwq_2 =	"rroy:660F38u24rM|rx/od:|rx/yq:",
+  vpmovsxdq_2 =	"rroy:660F38u25rM|rx/oq:|rx/yo:",
+  vpmovzxbw_2 =	"rroy:660F38u30rM|rx/oq:|rx/yo:",
+  vpmovzxbd_2 =	"rroy:660F38u31rM|rx/od:|rx/yq:",
+  vpmovzxbq_2 =	"rroy:660F38u32rM|rx/ow:|rx/yd:",
+  vpmovzxwd_2 =	"rroy:660F38u33rM|rx/oq:|rx/yo:",
+  vpmovzxwq_2 =	"rroy:660F38u34rM|rx/od:|rx/yq:",
+  vpmovzxdq_2 =	"rroy:660F38u35rM|rx/oq:|rx/yo:",
+  vpmuldq_3 =	"rrmoy:660F38V28rM",
+  vpmulhrsw_3 =	"rrmoy:660F38V0BrM",
+  vpmulld_3 =	"rrmoy:660F38V40rM",
+  vpshufb_3 =	"rrmoy:660F38V00rM",
+  vpshufd_3 =	"rmioy:660Fu70rMU",
+  vpshufhw_3 =	"rmioy:F30Fu70rMU",
+  vpshuflw_3 =	"rmioy:F20Fu70rMU",
+  vpsignb_3 =	"rrmoy:660F38V08rM",
+  vpsignw_3 =	"rrmoy:660F38V09rM",
+  vpsignd_3 =	"rrmoy:660F38V0ArM",
+  vpslldq_3 =	"rrioy:660Fv737mU",
+  vpsllw_3 =	"rrmoy:660FVF1rM|rrioy:660Fv716mU",
+  vpslld_3 =	"rrmoy:660FVF2rM|rrioy:660Fv726mU",
+  vpsllq_3 =	"rrmoy:660FVF3rM|rrioy:660Fv736mU",
+  vpsraw_3 =	"rrmoy:660FVE1rM|rrioy:660Fv714mU",
+  vpsrad_3 =	"rrmoy:660FVE2rM|rrioy:660Fv724mU",
+  vpsrldq_3 =	"rrioy:660Fv733mU",
+  vpsrlw_3 =	"rrmoy:660FVD1rM|rrioy:660Fv712mU",
+  vpsrld_3 =	"rrmoy:660FVD2rM|rrioy:660Fv722mU",
+  vpsrlq_3 =	"rrmoy:660FVD3rM|rrioy:660Fv732mU",
+  vptest_2 =	"rmoy:660F38u17rM",
+
+  -- AVX2 integer ops
+  vbroadcasti128_2 = "rx/yo:660F38u5ArM",
+  vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
+  vextracti128_3 = "mri/oy:660F3AuL39RmU",
+  vpblendd_4 =	"rrmioy:660F3AV02rMU",
+  vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
+  vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
+  vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
+  vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
+  vpermd_3 =	"rrmy:660F38V36rM",
+  vpermq_3 =	"rmiy:660F3AuX00rMU",
+  -- *vpgather* (!vsib)
+  vperm2i128_4 = "rrmiy:660F3AV46rMU",
+  vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
+  vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
+  vpsllvd_3 =	"rrmoy:660F38V47rM",
+  vpsllvq_3 =	"rrmoy:660F38VX47rM",
+  vpsravd_3 =	"rrmoy:660F38V46rM",
+  vpsrlvd_3 =	"rrmoy:660F38V45rM",
+  vpsrlvq_3 =	"rrmoy:660F38VX45rM",
 }
 
 ------------------------------------------------------------------------------
@@ -1454,28 +1671,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
   map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
 end
 
--- SSE FP arithmetic ops.
+-- SSE / AVX FP arithmetic ops.
 for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
 		     sub = 12, min = 13, div = 14, max = 15 } do
   map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
   map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
   map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
   map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
+  if n ~= 1 then
+    map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
+    map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
+    map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
+    map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
+  end
+end
+
+-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
+for name,n in pairs{
+  paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
+  paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
+  packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
+  paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
+  pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
+  pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
+  pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
+  pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
+  pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
+  pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
+  psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
+  psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
+  punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
+  punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
+  punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
+} do
+  map_op[name.."_2"] = format("rmo:660F%02XrM", n)
+  map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
 end
 
 ------------------------------------------------------------------------------
 
+local map_vexarg = { u = false, v = 1, V = 2 }
+
 -- Process pattern string.
 local function dopattern(pat, args, sz, op, needrex)
-  local digit, addin
+  local digit, addin, vex
   local opcode = 0
   local szov = sz
   local narg = 1
   local rex = 0
 
   -- Limit number of section buffer positions used by a single dasm_put().
-  -- A single opcode needs a maximum of 5 positions.
-  if secpos+5 > maxsecpos then wflush() end
+  -- A single opcode needs a maximum of 6 positions.
+  if secpos+6 > maxsecpos then wflush() end
 
   -- Process each character.
   for c in gmatch(pat.."|", ".") do
@@ -1489,6 +1736,8 @@ local function dopattern(pat, args, sz, op, needrex)
       szov = nil
     elseif c == "X" then	-- Force REX.W.
       rex = 8
+    elseif c == "L" then	-- Force VEX.L.
+      vex.l = true
     elseif c == "r" then	-- Merge 1st operand regno. into opcode.
       addin = args[1]; opcode = opcode + (addin.reg % 8)
       if narg < 2 then narg = 2 end
@@ -1512,21 +1761,41 @@ local function dopattern(pat, args, sz, op, needrex)
       if t.xreg and t.xreg > 7 then rex = rex + 2 end
       if s > 7 then rex = rex + 4 end
       if needrex then rex = rex + 16 end
-      wputop(szov, opcode, rex); opcode = nil
+      wputop(szov, opcode, rex, vex); opcode = nil
       local imark = sub(pat, -1) -- Force a mark (ugly).
       -- Put ModRM/SIB with regno/last digit as spare.
       wputmrmsib(t, imark, s, addin and addin.vreg)
       addin = nil
+    elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
+      local b = band(opcode, 255); opcode = shr(opcode, 8)
+      local m = 1
+      if b == 0x38 then m = 2
+      elseif b == 0x3a then m = 3 end
+      if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
+      if b ~= 0x0f then
+	werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
+	  "' in pattern `"..pat.."' for `"..op.."'")
+      end
+      local v = map_vexarg[c]
+      if v then v = remove(args, v) end
+      b = band(opcode, 255)
+      local p = 0
+      if b == 0x66 then p = 1
+      elseif b == 0xf3 then p = 2
+      elseif b == 0xf2 then p = 3 end
+      if p ~= 0 then opcode = shr(opcode, 8) end
+      if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
+      vex = { m = m, p = p, v = v }
     else
       if opcode then -- Flush opcode.
 	if szov == "q" and rex == 0 then rex = rex + 8 end
 	if needrex then rex = rex + 16 end
 	if addin and addin.reg == -1 then
-	  wputop(szov, opcode - 7, rex)
+	  wputop(szov, opcode - 7, rex, vex)
 	  waction("VREG", addin.vreg); wputxb(0)
 	else
 	  if addin and addin.reg > 7 then rex = rex + 1 end
-	  wputop(szov, opcode, rex)
+	  wputop(szov, opcode, rex, vex)
 	end
 	opcode = nil
       end
@@ -1563,6 +1832,14 @@ local function dopattern(pat, args, sz, op, needrex)
 	  else
 	    wputlabel("REL_", imm, 2)
 	  end
+	elseif c == "s" then
+	  local reg = a.reg
+	  if reg < 0 then
+	    wputb(0)
+	    waction("VREG", a.vreg); wputxb(5)
+	  else
+	    wputb(shl(reg, 4))
+	  end
 	else
 	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
 	end
@@ -1639,11 +1916,14 @@ map_op[".template__"] = function(params, template, nparams)
     if pat == "" then pat = lastpat else lastpat = pat end
     if matchtm(tm, args) then
       local prefix = sub(szm, 1, 1)
-      if prefix == "/" then -- Match both operand sizes.
-	if args[1].opsize == sub(szm, 2, 2) and
-	   args[2].opsize == sub(szm, 3, 3) then
-	  dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
-	  return
+      if prefix == "/" then -- Exactly match leading operand sizes.
+	for i = #szm,1,-1 do
+	  if i == 1 then
+	    dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
+	    return
+	  elseif args[i-1].opsize ~= sub(szm, i, i) then
+	    break
+	  end
 	end
       else -- Match common operand size.
 	local szp = sz
@@ -1678,7 +1958,7 @@ if x64 then
   function map_op.mov64_2(params)
     if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
     if secpos+2 > maxsecpos then wflush() end
-    local opcode, op64, sz, rex
+    local opcode, op64, sz, rex, vreg
     local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
     if op64 then
       local a = parseoperand(params[2])
@@ -1699,11 +1979,17 @@ if x64 then
 	  werror("bad operand mode")
 	end
 	op64 = params[2]
-	opcode = 0xb8 + band(a.reg, 7) -- !x64: no VREG support.
+	if a.reg == -1 then
+	  vreg = a.vreg
+	  opcode = 0xb8
+	else
+	  opcode = 0xb8 + band(a.reg, 7)
+	end
 	rex = a.reg > 7 and 9 or 8
       end
     end
     wputop(sz, opcode, rex)
+    if vreg then waction("VREG", vreg); wputxb(0) end
     waction("IMM_D", format("(unsigned int)(%s)", op64))
     waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
   end

+ 10 - 11
jni/LuaJIT-2.0.1/dynasm/dynasm.lua → jni/LuaJIT-2.1/dynasm/dynasm.lua

@@ -2,7 +2,7 @@
 -- DynASM. A dynamic assembler for code generation engines.
 -- Originally designed and implemented for LuaJIT.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See below for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -10,14 +10,14 @@
 local _info = {
   name =	"DynASM",
   description =	"A dynamic assembler for code generation engines",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   url =		"http://luajit.org/dynasm.html",
   license =	"MIT",
   copyright =	[[
-Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
@@ -85,7 +85,7 @@ end
 -- Resync CPP line numbers.
 local function wsync()
   if g_synclineno ~= g_lineno and g_opt.cpp then
-    wline("# "..g_lineno..' "'..g_fname..'"')
+    wline("#line "..g_lineno..' "'..g_fname..'"')
     g_synclineno = g_lineno
   end
 end
@@ -695,6 +695,9 @@ map_op[".arch_1"] = function(params)
   if not params then return "name" end
   local err = loadarch(params[1])
   if err then wfatal(err) end
+  wline(format("#if DASM_VERSION != %d", _info.vernum))
+  wline('#error "Version mismatch between DynASM and included encoding engine"')
+  wline("#endif")
 end
 
 -- Dummy .arch pseudo-opcode to improve the error report.
@@ -877,13 +880,9 @@ local function dasmhead(out)
 ** DO NOT EDIT! The original file is in "%s".
 */
 
-#if DASM_VERSION != %d
-#error "Version mismatch between DynASM and included encoding engine"
-#endif
-
 ]], _info.url,
     _info.version, g_arch._info.arch, g_arch._info.version,
-    g_fname, _info.vernum))
+    g_fname))
 end
 
 -- Read input file.

+ 1 - 1
jni/LuaJIT-2.0.1/etc/luajit.1 → jni/LuaJIT-2.1/etc/luajit.1

@@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end"
 Runs some nested loops and shows the resulting traces.
 .SH COPYRIGHT
 .PP
-\fBLuaJIT\fR is Copyright \(co 2005-2013 Mike Pall.
+\fBLuaJIT\fR is Copyright \(co 2005-2015 Mike Pall.
 .br
 \fBLuaJIT\fR is open source software, released under the MIT license.
 .SH SEE ALSO

+ 6 - 5
jni/LuaJIT-2.0.1/etc/luajit.pc → jni/LuaJIT-2.1/etc/luajit.pc

@@ -1,18 +1,19 @@
 # Package information for LuaJIT to be used by pkg-config.
 majver=2
-minver=0
-relver=1
-version=${majver}.${minver}.${relver}
+minver=1
+relver=0
+version=${majver}.${minver}.${relver}-beta1
 abiver=5.1
 
 prefix=/usr/local
+multilib=lib
 exec_prefix=${prefix}
-libdir=${exec_prefix}/lib
+libdir=${exec_prefix}/${multilib}
 libname=luajit-${abiver}
 includedir=${prefix}/include/luajit-${majver}.${minver}
 
 INSTALL_LMOD=${prefix}/share/lua/${abiver}
-INSTALL_CMOD=${prefix}/lib/lua/${abiver}
+INSTALL_CMOD=${prefix}/${multilib}/lua/${abiver}
 
 Name: LuaJIT
 Description: Just-in-time compiler for Lua

+ 1 - 1
jni/LuaJIT-2.0.1/src/.gitignore → jni/LuaJIT-2.1/src/.gitignore

@@ -4,4 +4,4 @@ lj_ffdef.h
 lj_libdef.h
 lj_recdef.h
 lj_folddef.h
-lj_vm.s
+lj_vm.[sS]

+ 90 - 59
jni/LuaJIT-2.0.1/src/Makefile → jni/LuaJIT-2.1/src/Makefile

@@ -7,11 +7,11 @@
 # Also works with MinGW and Cygwin on Windows.
 # Please check msvcbuild.bat for building with MSVC on Windows.
 #
-# Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 ##############################################################################
 
 MAJVER=  2
-MINVER=  0
+MINVER=  1
 RELVER=  0
 ABIVER=  5.1
 NODOTABIVER= 51
@@ -24,11 +24,13 @@ NODOTABIVER= 51
 # removing the '#' in front of them. Make sure you force a full recompile
 # with "make clean", followed by "make" if you change any options.
 #
+DEFAULT_CC = gcc
+#
 # LuaJIT builds as a native 32 or 64 bit binary by default.
-CC= gcc
+CC= $(DEFAULT_CC)
 #
 # Use this if you want to force a 32 bit build on a 64 bit multilib OS.
-#CC= gcc -m32
+#CC= $(DEFAULT_CC) -m32
 #
 # Since the assembler part does NOT maintain a frame pointer, it's pointless
 # to slow down the C part by not omitting it. Debugging, tracebacks and
@@ -42,17 +44,14 @@ CCOPT= -O2 -fomit-frame-pointer
 #
 # Target-specific compiler options:
 #
-# x86 only: it's recommended to compile at least for i686. Better yet,
-# compile for an architecture that has SSE2, too (-msse -msse2).
-#
 # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
 # the binaries to a different machine you could also use: -march=native
 #
-CCOPT_x86= -march=i686
+CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
 CCOPT_x64=
 CCOPT_arm=
+CCOPT_arm64=
 CCOPT_ppc=
-CCOPT_ppcspe=
 CCOPT_mips=
 #
 CCDEBUG=
@@ -122,8 +121,10 @@ XCFLAGS=
 #
 # Use the system provided memory allocator (realloc) instead of the
 # bundled memory allocator. This is slower, but sometimes helpful for
-# debugging. It's helpful for Valgrind's memcheck tool, too. This option
-# cannot be enabled on x64, since the built-in allocator is mandatory.
+# debugging. This option cannot be enabled on x64, since realloc usually
+# doesn't return addresses in the right address range.
+# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
+# the only way to get useful results from it for all other architectures.
 #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
 #
 # This define is required to run LuaJIT under Valgrind. The Valgrind
@@ -148,6 +149,29 @@ XCFLAGS=
 # You probably don't need to change anything below this line!
 ##############################################################################
 
+##############################################################################
+# Host system detection.
+##############################################################################
+
+ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
+  HOST_SYS= Windows
+  HOST_RM= del
+else
+  HOST_SYS:= $(shell uname -s)
+  ifneq (,$(findstring MINGW,$(HOST_SYS)))
+    HOST_SYS= Windows
+    HOST_MSYS= mingw
+  endif
+  ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
+    HOST_SYS= Windows
+    HOST_MSYS= cygwin
+  endif
+  # Use Clang for OSX host.
+  ifeq (Darwin,$(HOST_SYS))
+    DEFAULT_CC= clang
+  endif
+endif
+
 ##############################################################################
 # Flags and options for host and target.
 ##############################################################################
@@ -163,7 +187,8 @@ XCFLAGS=
 #   make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
 #   make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 
-CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
 LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
 
 HOST_CC= $(CC)
@@ -188,9 +213,10 @@ TARGET_LD= $(CROSS)$(CC)
 TARGET_AR= $(CROSS)ar rcus
 TARGET_STRIP= $(CROSS)strip
 
-TARGET_SONAME= libluajit.so
+TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
+TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER)
 TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib
-TARGET_DYLIBPATH= $(or $(PREFIX),/usr/local)/lib/$(TARGET_DYLIBNAME)
+TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME)
 TARGET_DLLNAME= lua$(NODOTABIVER).dll
 TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME)
 TARGET_DYNXLDOPTS=
@@ -201,6 +227,7 @@ TARGET_XLDFLAGS=
 TARGET_XLIBS= -lm
 TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
+TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
 TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
 TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -215,12 +242,17 @@ else
 ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
   TARGET_LJARCH= arm
 else
+ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+  TARGET_LJARCH= arm64
+else
 ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
+    TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
+  else
+    TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE
+  endif
   TARGET_LJARCH= ppc
 else
-ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
-  TARGET_LJARCH= ppcspe
-else
 ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
   ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
     TARGET_ARCH= -D__MIPSEL__=1
@@ -239,9 +271,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
   TARGET_SYS= PS3
   TARGET_ARCH+= -D__CELLOS_LV2__
   TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
-endif
-ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
-  TARGET_ARCH+= -DLUAJIT_NO_UNWIND
+  TARGET_XLIBS+= -lpthread
 endif
 
 TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
@@ -249,45 +279,38 @@ TARGET_ARCH+= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET_LJARCH))
 
 ifneq (,$(PREFIX))
 ifneq (/usr/local,$(PREFIX))
-  TARGET_XCFLAGS+= -DLUA_XROOT=\"$(PREFIX)/\"
+  TARGET_XCFLAGS+= -DLUA_ROOT=\"$(PREFIX)\"
   ifneq (/usr,$(PREFIX))
-    TARGET_DYNXLDOPTS= -Wl,-rpath,$(PREFIX)/lib
+    TARGET_DYNXLDOPTS= -Wl,-rpath,$(TARGET_LIBPATH)
   endif
 endif
 endif
+ifneq (,$(MULTILIB))
+  TARGET_XCFLAGS+= -DLUA_MULTILIB=\"$(MULTILIB)\"
+endif
+ifneq (,$(LMULTILIB))
+  TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\"
+endif
 
 ##############################################################################
-# System detection.
+# Target system detection.
 ##############################################################################
 
-ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
-  HOST_SYS= Windows
-  HOST_RM= del
-else
-  HOST_SYS:= $(shell uname -s)
-  ifneq (,$(findstring MINGW,$(HOST_SYS)))
-    HOST_SYS= Windows
-    HOST_MSYS= mingw
-  endif
-  ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
-    HOST_SYS= Windows
-    HOST_MSYS= cygwin
-  endif
-endif
-
 TARGET_SYS?= $(HOST_SYS)
 ifeq (Windows,$(TARGET_SYS))
   TARGET_STRIP+= --strip-unneeded
   TARGET_XSHLDFLAGS= -shared
   TARGET_DYNXLDOPTS=
 else
+ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
+  TARGET_XCFLAGS+= -fno-stack-protector
+endif
 ifeq (Darwin,$(TARGET_SYS))
   ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
     export MACOSX_DEPLOYMENT_TARGET=10.4
   endif
   TARGET_STRIP+= -x
   TARGET_AR+= 2>/dev/null
-  TARGET_XCFLAGS+= -fno-stack-protector
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
@@ -299,14 +322,13 @@ else
 ifeq (iOS,$(TARGET_SYS))
   TARGET_STRIP+= -x
   TARGET_AR+= 2>/dev/null
-  TARGET_XCFLAGS+= -fno-stack-protector
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-else
-  ifneq (,$(findstring stack-protector,$(shell $(TARGET_CC) -dumpspecs)))
-    TARGET_XCFLAGS+= -fno-stack-protector
+  ifeq (arm64,$(TARGET_LJARCH))
+    TARGET_XCFLAGS+= -fno-omit-frame-pointer
   endif
+else
   ifneq (SunOS,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
       TARGET_XLDFLAGS+= -Wl,-E
@@ -390,17 +412,18 @@ ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH)))
 else
   TARGET_ARCH+= -DLJ_ABI_SOFTFP=1
 endif
+ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D NO_UNWIND
+  TARGET_ARCH+= -DLUAJIT_NO_UNWIND
+endif
 DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
 ifeq (Windows,$(TARGET_SYS))
   DASM_AFLAGS+= -D WIN
 endif
-ifeq (x86,$(TARGET_LJARCH))
-  ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
-    DASM_AFLAGS+= -D SSE
-  endif
-else
 ifeq (x64,$(TARGET_LJARCH))
-  DASM_ARCH= x86
+  ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
+    DASM_ARCH= x86
+  endif
 else
 ifeq (arm,$(TARGET_LJARCH))
   ifeq (iOS,$(TARGET_SYS))
@@ -414,13 +437,15 @@ ifeq (ppc,$(TARGET_LJARCH))
   ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
     DASM_AFLAGS+= -D ROUND
   endif
-  ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
     DASM_AFLAGS+= -D GPR64
   endif
   ifeq (PS3,$(TARGET_SYS))
     DASM_AFLAGS+= -D PPE -D TOC
   endif
-endif
+  ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
+    DASM_ARCH= ppc64
+  endif
 endif
 endif
 endif
@@ -436,7 +461,7 @@ BUILDVM_X= $(BUILDVM_T)
 HOST_O= $(MINILUA_O) $(BUILDVM_O)
 HOST_T= $(MINILUA_T) $(BUILDVM_T)
 
-LJVM_S= lj_vm.s
+LJVM_S= lj_vm.S
 LJVM_O= lj_vm.o
 LJVM_BOUT= $(LJVM_S)
 LJVM_MODE= elfasm
@@ -445,10 +470,11 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
 	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
 LJLIB_C= $(LJLIB_O:.o=.c)
 
-LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
+LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
-	  lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+	  lj_strfmt.o lj_api.o lj_profile.o \
+	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
 	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -487,7 +513,8 @@ TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO)
 
 ifeq (Windows,$(TARGET_SYS))
   TARGET_DYNCC= $(STATIC_CC)
-  LJVM_MODE= coffasm
+  LJVM_MODE= peobj
+  LJVM_BOUT= $(LJVM_O)
   LUAJIT_T= luajit.exe
   ifeq (cygwin,$(HOST_MSYS))
     LUAJIT_SO= cyg$(TARGET_DLLNAME)
@@ -567,6 +594,10 @@ amalg:
 clean:
 	$(HOST_RM) $(ALL_RM)
 
+libbc:
+	./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
+	$(MAKE) all
+
 depend:
 	@for file in $(ALL_HDRGEN); do \
 	  test -f $$file || touch $$file; \
@@ -581,7 +612,7 @@ depend:
 	  test -s $$file || $(HOST_RM) $$file; \
 	  done
 
-.PHONY: default all amalg clean depend
+.PHONY: default all amalg clean libbc depend
 
 ##############################################################################
 # Rules for generated files.
@@ -638,10 +669,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
 	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
 	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
 
-%.o: %.s
+%.o: %.S
 	$(E) "ASM       $@"
-	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
-	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
+	$(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
+	$(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
 
 $(LUAJIT_O):
 	$(E) "CC        $@"

+ 111 - 93
jni/LuaJIT-2.0.1/src/Makefile.dep → jni/LuaJIT-2.1/src/Makefile.dep

@@ -5,43 +5,47 @@ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
  lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
  lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
- lj_lib.h lj_libdef.h
+ lj_strfmt.h lj_lib.h lj_libdef.h
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
+ lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
+ lj_ffdef.h lj_lib.h lj_libdef.h
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_libdef.h
 lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
  lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
- lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+ lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
+ lj_libdef.h
 lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
 lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h lj_ffdef.h \
- lj_lib.h lj_libdef.h
-lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
- lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
- lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \
- lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \
- lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
+ lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
+ lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
 lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
 lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
+ lj_libdef.h
 lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
 lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
- lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \
- lj_lib.h lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
+ lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
 lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \
- lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
 lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
 lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
  lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h
+ lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
 lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
  lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
@@ -50,17 +54,20 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
  lj_bcdef.h
 lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \
- lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
+ lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
+ lj_strfmt.h
 lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \
- lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+ lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
+ lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
 lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_carith.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
+ lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
 lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
+ lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h
 lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -71,107 +78,116 @@ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
  lj_ccallback.h
 lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
 lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
 lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_clib.h
+ lj_cdata.h lj_clib.h lj_strfmt.h
 lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \
- lj_bc.h lj_vm.h lj_char.h lj_strscan.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
 lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
- lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \
- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+ lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
+ lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
+ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
- lj_crecord.h
+ lj_crecord.h lj_strfmt.h
 lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
+ lj_ccallback.h
 lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \
- lj_bc.h lj_jit.h lj_ir.h
+ lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
+ lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
 lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \
- lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \
- lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \
- lj_vm.h luajit.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
+ lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
+ lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
 lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
  lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
- lj_traceerr.h lj_vm.h
+ lj_traceerr.h lj_vm.h lj_strfmt.h
 lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
  lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
- lj_vm.h lj_strscan.h lj_recdef.h
+ lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h lj_vm.h
 lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \
- lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
+ lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
 lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \
- lj_ir.h lj_dispatch.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
+ lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
- lj_vm.h lj_strscan.h lj_lib.h
+ lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
- lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
+ lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
+ lj_strfmt.h
 lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h
+ lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
+ lj_bcdump.h lj_lib.h
 lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \
- lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
 lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \
- lj_traceerr.h lj_vm.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_vm.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
 lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h
 lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
- lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \
- lj_strscan.h lj_folddef.h
+ lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
 lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
+ lj_vm.h
 lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
+ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h
 lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h lj_strscan.h
 lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
 lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_vm.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
+ lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
 lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \
- lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
+ lj_vm.h lj_vmevent.h
+lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
+ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \
- lj_ffrecord.h lj_snap.h lj_vm.h
+ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
+ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
  lj_target_*.h lj_ctype.h lj_cdata.h
 lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
+ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h
+ lj_err.h lj_errmsg.h lj_str.h lj_char.h
+lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_char.h lj_strscan.h
 lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -189,21 +205,22 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_vm.h
 ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \
- lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \
- lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \
- lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \
- lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \
- lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \
- luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \
- lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
- lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
- lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
- lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
- lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \
- lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
- lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
- lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
+ lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
+ lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
+ lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
+ lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
+ lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
+ lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
+ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
+ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
+ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
+ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
+ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
+ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
+ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
  lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
  lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
  lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
@@ -220,7 +237,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
 host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
 host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
- lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h
+ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
+ host/buildvm_libbc.h
 host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
 host/minilua.o: host/minilua.c

+ 0 - 0
jni/LuaJIT-2.0.1/src/host/.gitignore → jni/LuaJIT-2.1/src/host/.gitignore


+ 0 - 0
jni/LuaJIT-2.0.1/src/host/README → jni/LuaJIT-2.1/src/host/README


+ 18 - 10
jni/LuaJIT-2.0.1/src/host/buildvm.c → jni/LuaJIT-2.1/src/host/buildvm.c

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** This is a tool to build the hand-tuned assembler code required for
 ** LuaJIT's bytecode interpreter. It supports a variety of output formats
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
 #include "../dynasm/dasm_x86.h"
 #elif LJ_TARGET_ARM
 #include "../dynasm/dasm_arm.h"
+#elif LJ_TARGET_ARM64
+#include "../dynasm/dasm_arm64.h"
 #elif LJ_TARGET_PPC
 #include "../dynasm/dasm_ppc.h"
-#elif LJ_TARGET_PPCSPE
-#include "../dynasm/dasm_ppc.h"
 #elif LJ_TARGET_MIPS
 #include "../dynasm/dasm_mips.h"
 #else
@@ -108,10 +108,16 @@ static const char *sym_decorate(BuildCtx *ctx,
   sprintf(name, "%s%s%s", symprefix, prefix, suffix);
   p = strchr(name, '@');
   if (p) {
+#if LJ_TARGET_X86ORX64
     if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
       name[0] = '@';
     else
       *p = '\0';
+#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
+    /* Keep @plt etc. */
+#else
+    *p = '\0';
+#endif
   }
   p = (char *)malloc(strlen(name)+1);  /* MSVC doesn't like strdup. */
   strcpy(p, name);
@@ -173,6 +179,7 @@ static int build_code(BuildCtx *ctx)
   ctx->nreloc = 0;
 
   ctx->globnames = globnames;
+  ctx->extnames = extnames;
   ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
   ctx->nrelocsym = 0;
   for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -314,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx)
   char buf[80];
   int i;
   fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
-  fprintf(ctx->fp, "module(...)\n\n");
+  fprintf(ctx->fp, "return {\n\n");
 
   fprintf(ctx->fp, "bcnames = \"");
   for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
   fprintf(ctx->fp, "irnames = \"");
   for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
   fprintf(ctx->fp, "irfpm = { [0]=");
   for (i = 0; irfpm_names[i]; i++)
     fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
   fprintf(ctx->fp, "irfield = { [0]=");
   for (i = 0; irfield_names[i]; i++) {
@@ -337,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx)
     if (p) *p = '.';
     fprintf(ctx->fp, "\"%s\", ", buf);
   }
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
   fprintf(ctx->fp, "ircall = {\n[0]=");
   for (i = 0; ircall_names[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
   fprintf(ctx->fp, "traceerr = {\n[0]=");
   for (i = 0; trace_errors[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 }
 
 /* -- Argument parsing ---------------------------------------------------- */
@@ -484,6 +491,7 @@ int main(int argc, char **argv)
   case BUILD_vmdef:
     emit_vmdef(ctx);
     emit_lib(ctx);
+    fprintf(ctx->fp, "}\n\n");
     break;
   case BUILD_ffdef:
   case BUILD_libdef:

+ 2 - 1
jni/LuaJIT-2.0.1/src/host/buildvm.h → jni/LuaJIT-2.1/src/host/buildvm.h

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _BUILDVM_H
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
   const char *beginsym;
   /* Strings generated by DynASM. */
   const char *const *globnames;
+  const char *const *extnames;
   const char *dasm_ident;
   const char *dasm_arch;
   /* Relocations. */

+ 44 - 12
jni/LuaJIT-2.0.1/src/host/buildvm_asm.c → jni/LuaJIT-2.1/src/host/buildvm_asm.c

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: Assembler source code emitter.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "buildvm.h"
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
   "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
 };
 
-/* Emit relocation for the incredibly stupid OSX assembler. */
-static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
+/* Emit x86/x64 text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
 				const char *sym)
 {
   const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
     exit(1);
   }
   emit_asm_bytes(ctx, cp, n);
+  if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+    /* Various fixups for external symbols outside of our binary. */
+    if (ctx->mode == BUILD_elfasm) {
+      if (LJ_32)
+	fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
+      fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
+      if (LJ_32)
+	fprintf(ctx->fp, "#endif\n");
+      return;
+    } else if (LJ_32 && ctx->mode == BUILD_machasm) {
+      fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
+      return;
+    }
+  }
   fprintf(ctx->fp, "\t%s %s\n", opname, sym);
 }
 #else
@@ -100,14 +114,23 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
     fprintf(ctx->fp, "\tblx %s\n", sym);
   } else if ((ins & 0x0e000000u) == 0x0a000000u) {
     fprintf(ctx->fp, "\t%s%.2s %s\n", (ins & 0x01000000u) ? "bl" : "b",
-	    "eqnecsccmiplvsvchilsgeltgtle" + 2*(ins >> 28), sym);
+	    &"eqnecsccmiplvsvchilsgeltgtle"[2*(ins >> 28)], sym);
+  } else {
+    fprintf(stderr,
+	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
+	    ins, sym);
+    exit(1);
+  }
+#elif LJ_TARGET_ARM64
+  if ((ins >> 26) == 0x25u) {
+    fprintf(ctx->fp, "\tbl %s\n", sym);
   } else {
     fprintf(stderr,
 	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
 	    ins, sym);
     exit(1);
   }
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_PPC
 #if LJ_TARGET_PS3
 #define TOCPREFIX "."
 #else
@@ -117,6 +140,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
     fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
 	    (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
   } else if ((ins >> 26) == 18) {
+#if LJ_ARCH_PPC64
+    const char *suffix = strchr(sym, '@');
+    if (suffix && suffix[1] == 'h') {
+      fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
+    } else if (suffix && suffix[1] == 'l') {
+      fprintf(ctx->fp, "\tld 12, %s\n", sym);
+    } else
+#endif
     fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
   } else {
     fprintf(stderr,
@@ -214,6 +245,9 @@ void emit_asm(BuildCtx *ctx)
   int i, rel;
 
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+#if LJ_ARCH_PPC64
+  fprintf(ctx->fp, "\t.abiversion 2\n");
+#endif
   fprintf(ctx->fp, "\t.text\n");
   emit_asm_align(ctx, 4);
 
@@ -254,8 +288,9 @@ void emit_asm(BuildCtx *ctx)
       BuildReloc *r = &ctx->reloc[rel];
       int n = r->ofs - ofs;
 #if LJ_TARGET_X86ORX64
-      if (ctx->mode == BUILD_machasm && r->type != 0) {
-	emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+      if (r->type != 0 &&
+	  (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
+	emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
       } else {
 	emit_asm_bytes(ctx, ctx->code+ofs, n);
 	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -286,13 +321,10 @@ void emit_asm(BuildCtx *ctx)
   fprintf(ctx->fp, "\n");
   switch (ctx->mode) {
   case BUILD_elfasm:
-#if !LJ_TARGET_PS3
+#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
     fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
 #endif
-#if LJ_TARGET_PPCSPE
-    /* Soft-float ABI + SPE. */
-    fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
-#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3
     /* Hard-float ABI. */
     fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
 #endif

+ 1 - 1
jni/LuaJIT-2.0.1/src/host/buildvm_fold.c → jni/LuaJIT-2.1/src/host/buildvm_fold.c

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: IR folding hash table generator.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "buildvm.h"

+ 61 - 2
jni/LuaJIT-2.0.1/src/host/buildvm_lib.c → jni/LuaJIT-2.1/src/host/buildvm_lib.c

@@ -1,11 +1,13 @@
 /*
 ** LuaJIT VM builder: library definition compiler.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "buildvm.h"
 #include "lj_obj.h"
+#include "lj_bc.h"
 #include "lj_lib.h"
+#include "buildvm_libbc.h"
 
 /* Context for library definitions. */
 static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
   regfunc = REGFUNC_OK;
 }
 
+static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
+{
+  uint32_t v = *p++;
+  if (v >= 0x80) {
+    int sh = 0; v &= 0x7f;
+    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+  }
+  *vv = v;
+  return p;
+}
+
+static void libdef_fixupbc(uint8_t *p)
+{
+  uint32_t i, sizebc;
+  p += 4;
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  for (i = 0; i < sizebc; i++, p += 4) {
+    uint8_t op = p[libbc_endian ? 3 : 0];
+    uint8_t ra = p[libbc_endian ? 2 : 1];
+    uint8_t rc = p[libbc_endian ? 1 : 2];
+    uint8_t rb = p[libbc_endian ? 0 : 3];
+    if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
+      op = BC_ISNUM; rc++;
+    }
+    p[LJ_ENDIAN_SELECT(0, 3)] = op;
+    p[LJ_ENDIAN_SELECT(1, 2)] = ra;
+    p[LJ_ENDIAN_SELECT(2, 1)] = rc;
+    p[LJ_ENDIAN_SELECT(3, 0)] = rb;
+  }
+}
+
+static void libdef_lua(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(arg);
+  if (ctx->mode == BUILD_libdef) {
+    int i;
+    for (i = 0; libbc_map[i].name != NULL; i++) {
+      if (!strcmp(libbc_map[i].name, p)) {
+	int ofs = libbc_map[i].ofs;
+	int len = libbc_map[i+1].ofs - ofs;
+	obuf[2]++;  /* Bump hash table size. */
+	*optr++ = LIBINIT_LUA;
+	libdef_name(p, 0);
+	memcpy(optr, libbc_code + ofs, len);
+	libdef_fixupbc(optr);
+	optr += len;
+	return;
+      }
+    }
+    fprintf(stderr, "Error: missing libbc definition for %s\n", p);
+    exit(1);
+  }
+}
+
 static uint32_t find_rec(char *name)
 {
   char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
   { "CF(",	")",		libdef_func,		LIBINIT_CF },
   { "ASM(",	")",		libdef_func,		LIBINIT_ASM },
   { "ASM_(",	")",		libdef_func,		LIBINIT_ASM_ },
+  { "LUA(",	")",		libdef_lua,		0 },
   { "REC(",	")",		libdef_rec,		0 },
   { "PUSH(",	")",		libdef_push,		0 },
   { "SET(",	")",		libdef_set,		0 },
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx)
       "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
       ffasmfunc);
   } else if (ctx->mode == BUILD_vmdef) {
-    fprintf(ctx->fp, "}\n\n");
+    fprintf(ctx->fp, "},\n\n");
   } else if (ctx->mode == BUILD_bcdef) {
     int i;
     fprintf(ctx->fp, "\n};\n\n");

+ 45 - 0
jni/LuaJIT-2.1/src/host/buildvm_libbc.h

@@ -0,0 +1,45 @@
+/* This is a generated file. DO NOT EDIT! */
+
+static const int libbc_endian = 0;
+
+static const uint8_t libbc_code[] = {
+#if LJ_FR2
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
+0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0
+#else
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
+0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0
+#endif
+};
+
+static const struct { const char *name; int ofs; } libbc_map[] = {
+{"math_deg",0},
+{"math_rad",25},
+{"string_len",50},
+{"table_foreachi",69},
+{"table_foreach",136},
+{"table_getn",207},
+{"table_remove",226},
+{NULL,355}
+};
+

+ 1 - 1
jni/LuaJIT-2.0.1/src/host/buildvm_peobj.c → jni/LuaJIT-2.1/src/host/buildvm_peobj.c

@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: PE object emitter.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Only used for building on Windows, since we cannot assume the presence
 ** of a suitable assembler. The host and target byte order must match.

+ 197 - 0
jni/LuaJIT-2.1/src/host/genlibbc.lua

@@ -0,0 +1,197 @@
+----------------------------------------------------------------------------
+-- Lua script to dump the bytecode of the library functions written in Lua.
+-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
+----------------------------------------------------------------------------
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+
+local ffi = require("ffi")
+local bit = require("bit")
+local vmdef = require("jit.vmdef")
+local bcnames = vmdef.bcnames
+
+local format = string.format
+
+local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
+
+local function usage(arg)
+  io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
+		  " [-o buildvm_libbc.h] lib_*.c\n")
+  os.exit(1)
+end
+
+local function parse_arg(arg)
+  local outfile = "-"
+  if not (arg and arg[1]) then
+    usage(arg)
+  end
+  if arg[1] == "-o" then
+    outfile = arg[2]
+    if not outfile then usage(arg) end
+    table.remove(arg, 1)
+    table.remove(arg, 1)
+  end
+  return outfile
+end
+
+local function read_files(names)
+  local src = ""
+  for _,name in ipairs(names) do
+    local fp = assert(io.open(name))
+    src = src .. fp:read("*a")
+    fp:close()
+  end
+  return src
+end
+
+local function transform_lua(code)
+  local fixup = {}
+  local n = -30000
+  code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
+    n = n + 1
+    fixup[n] = { "CHECK", tp }
+    return format("%s=%d", var, n)
+  end)
+  code = string.gsub(code, "PAIRS%((.-)%)", function(var)
+    fixup.PAIRS = true
+    return format("nil, %s, 0", var)
+  end)
+  return "return "..code, fixup
+end
+
+local function read_uleb128(p)
+  local v = p[0]; p = p + 1
+  if v >= 128 then
+    local sh = 7; v = v - 128
+    repeat
+      local r = p[0]
+      v = v + bit.lshift(bit.band(r, 127), sh)
+      sh = sh + 7
+      p = p + 1
+    until r < 128
+  end
+  return p, v
+end
+
+-- ORDER LJ_T
+local name2itype = {
+  str = 5, func = 9, tab = 12, int = 14, num = 15
+}
+
+local BC = {}
+for i=0,#bcnames/6-1 do
+  BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
+end
+local xop, xra = isbe and 3 or 0, isbe and 2 or 1
+local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
+
+local function fixup_dump(dump, fixup)
+  local buf = ffi.new("uint8_t[?]", #dump+1, dump)
+  local p = buf+5
+  local n, sizebc
+  p, n = read_uleb128(p)
+  local start = p
+  p = p + 4
+  p = read_uleb128(p)
+  p = read_uleb128(p)
+  p, sizebc = read_uleb128(p)
+  local rawtab = {}
+  for i=0,sizebc-1 do
+    local op = p[xop]
+    if op == BC.KSHORT then
+      local rd = p[xrc] + 256*p[xrb]
+      rd = bit.arshift(bit.lshift(rd, 16), 16)
+      local f = fixup[rd]
+      if f then
+	if f[1] == "CHECK" then
+	  local tp = f[2]
+	  if tp == "tab" then rawtab[p[xra]] = true end
+	  p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
+	  p[xrb] = 0
+	  p[xrc] = name2itype[tp]
+	else
+	  error("unhandled fixup type: "..f[1])
+	end
+      end
+    elseif op == BC.TGETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TGETR
+      end
+    elseif op == BC.TSETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TSETR
+      end
+    elseif op == BC.ITERC then
+      if fixup.PAIRS then
+	p[xop] = BC.ITERN
+      end
+    end
+    p = p + 4
+  end
+  return ffi.string(start, n)
+end
+
+local function find_defs(src)
+  local defs = {}
+  for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
+    local env = {}
+    local tcode, fixup = transform_lua(code)
+    local func = assert(load(tcode, "", nil, env))()
+    defs[name] = fixup_dump(string.dump(func, true), fixup)
+    defs[#defs+1] = name
+  end
+  return defs
+end
+
+local function gen_header(defs)
+  local t = {}
+  local function w(x) t[#t+1] = x end
+  w("/* This is a generated file. DO NOT EDIT! */\n\n")
+  w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
+  local s = ""
+  for _,name in ipairs(defs) do
+    s = s .. defs[name]
+  end
+  w("static const uint8_t libbc_code[] = {\n")
+  local n = 0
+  for i=1,#s do
+    local x = string.byte(s, i)
+    w(x); w(",")
+    n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
+    if n >= 75 then n = 0; w("\n") end
+  end
+  w("0\n};\n\n")
+  w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
+  local m = 0
+  for _,name in ipairs(defs) do
+    w('{"'); w(name); w('",'); w(m) w('},\n')
+    m = m + #defs[name]
+  end
+  w("{NULL,"); w(m); w("}\n};\n\n")
+  return table.concat(t)
+end
+
+local function write_file(name, data)
+  if name == "-" then
+    assert(io.write(data))
+    assert(io.flush())
+  else
+    local fp = io.open(name)
+    if fp then
+      local old = fp:read("*a")
+      fp:close()
+      if data == old then return end
+    end
+    fp = assert(io.open(name, "w"))
+    assert(fp:write(data))
+    assert(fp:close())
+  end
+end
+
+local outfile = parse_arg(arg)
+local src = read_files(arg)
+local defs = find_defs(src)
+local hdr = gen_header(defs)
+write_file(outfile, hdr)
+

+ 2 - 1
jni/LuaJIT-2.0.1/src/host/genminilua.lua → jni/LuaJIT-2.1/src/host/genminilua.lua

@@ -2,7 +2,7 @@
 -- Lua script to generate a customized, minified version of Lua.
 -- The resulting 'minilua' is used for the build process of LuaJIT.
 ----------------------------------------------------------------------------
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 
@@ -210,6 +210,7 @@ typedef unsigned __int64 U64;
 #else
 typedef unsigned long long U64;
 #endif
+int _CRT_glob = 0;
 ]]}, {}
 
 local function preprocess(src)

+ 1 - 0
jni/LuaJIT-2.0.1/src/host/minilua.c → jni/LuaJIT-2.1/src/host/minilua.c

@@ -27,6 +27,7 @@ typedef unsigned __int64 U64;
 #else
 typedef unsigned long long U64;
 #endif
+int _CRT_glob = 0;
 #include <stddef.h>
 #include <stdarg.h>
 #include <limits.h>

+ 0 - 0
jni/LuaJIT-2.0.1/src/jit/.gitignore → jni/LuaJIT-2.1/src/jit/.gitignore


+ 10 - 11
jni/LuaJIT-2.0.1/src/jit/bc.lua → jni/LuaJIT-2.1/src/jit/bc.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT bytecode listing module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -41,7 +41,7 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20001, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
 end
 
 -- Public module functions.
-module(...)
-
-line = bcline
-dump = bcdump
-targets = bctargets
-
-on = bcliston
-off = bclistoff
-start = bcliston -- For -j command line option.
+return {
+  line = bcline,
+  dump = bcdump,
+  targets = bctargets,
+  on = bcliston,
+  off = bclistoff,
+  start = bcliston -- For -j command line option.
+}
 

+ 16 - 14
jni/LuaJIT-2.0.1/src/jit/bcsave.lua → jni/LuaJIT-2.1/src/jit/bcsave.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT module to save/list bytecode.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -11,7 +11,7 @@
 ------------------------------------------------------------------------------
 
 local jit = require("jit")
-assert(jit.version_num == 20001, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local bit = require("bit")
 
 -- Symbol name prefix for LuaJIT bytecode.
@@ -63,13 +63,13 @@ local map_type = {
 }
 
 local map_arch = {
-  x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true,
+  x86 = true, x64 = true, arm = true, arm64 = true, ppc = true,
   mips = true, mipsel = true,
 }
 
 local map_os = {
   linux = true, windows = true, osx = true, freebsd = true, netbsd = true,
-  openbsd = true, solaris = true,
+  openbsd = true, dragonfly = true, solaris = true,
 }
 
 local function checkarg(str, map, err)
@@ -200,9 +200,9 @@ typedef struct {
 ]]
   local symname = LJBC_PREFIX..ctx.modname
   local is64, isbe = false, false
-  if ctx.arch == "x64" then
+  if ctx.arch == "x64" or ctx.arch == "arm64" then
     is64 = true
-  elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
+  elseif ctx.arch == "ppc" or ctx.arch == "mips" then
     isbe = true
   end
 
@@ -237,7 +237,7 @@ typedef struct {
   hdr.eendian = isbe and 2 or 1
   hdr.eversion = 1
   hdr.type = f16(1)
-  hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch])
+  hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
   if ctx.arch == "mips" or ctx.arch == "mipsel" then
     hdr.flags = 0x50001006
   end
@@ -477,13 +477,13 @@ typedef struct {
 } mach_obj_64;
 typedef struct {
   mach_fat_header fat;
-  mach_fat_arch fat_arch[4];
+  mach_fat_arch fat_arch[2];
   struct {
     mach_header hdr;
     mach_segment_command seg;
     mach_section sec;
     mach_symtab_command sym;
-  } arch[4];
+  } arch[2];
   mach_nlist sym_entry;
   uint8_t space[4096];
 } mach_fat_obj;
@@ -494,6 +494,8 @@ typedef struct {
     is64, align, mobj = true, 8, "mach_obj_64"
   elseif ctx.arch == "arm" then
     isfat, mobj = true, "mach_fat_obj"
+  elseif ctx.arch == "arm64" then
+    is64, align, isfat, mobj = true, 8, true, "mach_fat_obj"
   else
     check(ctx.arch == "x86", "unsupported architecture for OSX")
   end
@@ -503,8 +505,8 @@ typedef struct {
   -- Create Mach-O object and fill in header.
   local o = ffi.new(mobj)
   local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
-  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch]
-  local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch]
+  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
+  local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
   if isfat then
     o.fat.magic = be32(0xcafebabe)
     o.fat.nfat_arch = be32(#cpusubtype)
@@ -653,7 +655,7 @@ end
 ------------------------------------------------------------------------------
 
 -- Public module functions.
-module(...)
-
-start = docmd -- Process -b command line option.
+return {
+  start = docmd -- Process -b command line option.
+}
 

+ 10 - 10
jni/LuaJIT-2.0.1/src/jit/dis_arm.lua → jni/LuaJIT-2.1/src/jit/dis_arm.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT ARM disassembler module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
 end
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   ctx.code = code
   ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
 end
 
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 16 then return map_gpr[r] end
   return "d"..(r-16)
 end
 
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 

+ 16 - 16
jni/LuaJIT-2.0.1/src/jit/dis_mips.lua → jni/LuaJIT-2.1/src/jit/dis_mips.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPS disassembler module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -384,7 +384,7 @@ local function disass_block(ctx, ofs, len)
 end
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   ctx.code = code
   ctx.addr = addr or 0
@@ -396,33 +396,33 @@ local function create_(code, addr, out)
   return ctx
 end
 
-local function create_el_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create_el(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.get = get_le
   return ctx
 end
 
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 
-local function disass_el_(code, addr, out)
-  create_el_(code, addr, out):disass()
+local function disass_el(code, addr, out)
+  create_el(code, addr, out):disass()
 end
 
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
 end
 
 -- Public module functions.
-module(...)
-
-create = create_
-create_el = create_el_
-disass = disass_
-disass_el = disass_el_
-regname = regname_
+return {
+  create = create,
+  create_el = create_el,
+  disass = disass,
+  disass_el = disass_el,
+  regname = regname
+}
 

+ 7 - 10
jni/LuaJIT-2.0.1/src/jit/dis_mipsel.lua → jni/LuaJIT-2.1/src/jit/dis_mipsel.lua

@@ -1,20 +1,17 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPSEL disassembler wrapper module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the little-endian functions from the
 -- MIPS disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 
-local require = require
-
-module(...)
-
-local dis_mips = require(_PACKAGE.."dis_mips")
-
-create = dis_mips.create_el
-disass = dis_mips.disass_el
-regname = dis_mips.regname
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create_el,
+  disass = dis_mips.disass_el,
+  regname = dis_mips.regname
+}
 

+ 10 - 10
jni/LuaJIT-2.0.1/src/jit/dis_ppc.lua → jni/LuaJIT-2.1/src/jit/dis_ppc.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT PPC disassembler module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
 end
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   ctx.code = code
   ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
 end
 
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
 end
 
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 

+ 7 - 10
jni/LuaJIT-2.0.1/src/jit/dis_x64.lua → jni/LuaJIT-2.1/src/jit/dis_x64.lua

@@ -1,20 +1,17 @@
 ----------------------------------------------------------------------------
 -- LuaJIT x64 disassembler wrapper module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the 64 bit functions from the combined
 -- x86/x64 disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 
-local require = require
-
-module(...)
-
-local dis_x86 = require(_PACKAGE.."dis_x86")
-
-create = dis_x86.create64
-disass = dis_x86.disass64
-regname = dis_x86.regname64
+local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
+return {
+  create = dis_x86.create64,
+  disass = dis_x86.disass64,
+  regname = dis_x86.regname64
+}
 

+ 176 - 89
jni/LuaJIT-2.0.1/src/jit/dis_x86.lua → jni/LuaJIT-2.1/src/jit/dis_x86.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT x86/x64 disassembler module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -15,19 +15,20 @@
 -- Intel and AMD manuals. The supported instruction set is quite extensive
 -- and reflects what a current generation Intel or AMD CPU implements in
 -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
--- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
--- instructions.
+-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
+-- (VMX/SVM) instructions.
 --
 -- Notes:
 -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
 -- * No attempt at optimization has been made -- it's fast enough for my needs.
--- * The public API may change when more architectures are added.
 ------------------------------------------------------------------------------
 
 local type = type
 local sub, byte, format = string.sub, string.byte, string.format
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 local lower, rep = string.lower, string.rep
+local bit = require("bit")
+local tohex = bit.tohex
 
 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
 local map_opc1_32 = {
@@ -76,7 +77,7 @@ local map_opc1_32 = {
 "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
 "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
 --Cx
-"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
+"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
 "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
 --Dx
 "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({
   [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
   [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
   [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
-  [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
+  [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
   [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
 }, { __index = map_opc1_32 })
 
@@ -112,12 +113,12 @@ local map_opc2 = {
 [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
 "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
 --1x
-"movupsXrm|movssXrm|movupdXrm|movsdXrm",
-"movupsXmr|movssXmr|movupdXmr|movsdXmr",
+"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
+"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
 "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
 "movlpsXmr||movlpdXmr",
-"unpcklpsXrm||unpcklpdXrm",
-"unpckhpsXrm||unpckhpdXrm",
+"unpcklpsXrvm||unpcklpdXrvm",
+"unpckhpsXrvm||unpckhpdXrvm",
 "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
 "movhpsXmr||movhpdXmr",
 "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
@@ -126,7 +127,7 @@ local map_opc2 = {
 "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
 "movapsXrm||movapdXrm",
 "movapsXmr||movapdXmr",
-"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
+"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
 "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
 "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
 "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -142,27 +143,27 @@ local map_opc2 = {
 "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
 --5x
 "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
-"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
-"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
-"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
-"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
-"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
+"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
+"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
+"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
+"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
+"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
 "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
-"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
-"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
+"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
+"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
 --6x
-"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
-"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
-"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
-"||punpcklqdqXrm","||punpckhqdqXrm",
+"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
+"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
+"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
+"||punpcklqdqXrvm","||punpckhqdqXrvm",
 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
 --7x
 "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
 "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
-"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
+"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
 nil,nil,
-"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
+"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
 "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
 --8x
 "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -180,27 +181,27 @@ nil,nil,
 "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
 --Cx
 "xaddBmr","xaddVmr",
-"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
-"pinsrwPrWmu","pextrwDrPmu",
-"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
+"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
+"pinsrwPrvWmu","pextrwDrPmu",
+"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
 "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
 --Dx
-"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
-"paddqPrm","pmullwPrm",
+"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
+"paddqPrvm","pmullwPrvm",
 "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
-"psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
-"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
+"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
+"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
 --Ex
-"pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
-"pmulhuwPrm","pmulhwPrm",
+"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
+"pmulhuwPrvm","pmulhwPrvm",
 "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
-"psubsbPrm","psubswPrm","pminswPrm","porPrm",
-"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
+"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
+"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
 --Fx
-"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
-"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
-"psubbPrm","psubwPrm","psubdPrm","psubqPrm",
-"paddbPrm","paddwPrm","padddPrm","ud",
+"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
+"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
+"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
+"paddbPrvm","paddwPrvm","padddPrvm","ud",
 }
 assert(map_opc2[255] == "ud")
 
@@ -208,46 +209,62 @@ assert(map_opc2[255] == "ud")
 local map_opc3 = {
 ["38"] = { -- [66] 0f 38 xx
 --0x
-[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
-"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
-"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
-nil,nil,nil,nil,
+[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
+"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
+"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
+"||permilpsXrvm","||permilpdXrvm",nil,nil,
 --1x
 "||pblendvbXrma",nil,nil,nil,
-"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
-nil,nil,nil,nil,
+"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
+"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
 "pabsbPrm","pabswPrm","pabsdPrm",nil,
 --2x
 "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
 "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
-"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
-nil,nil,nil,nil,
+"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
+"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
 --3x
 "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
-"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
-"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
-"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
+"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
+"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
+"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
 --4x
-"||pmulddXrm","||phminposuwXrm",
+"||pmulddXrvm","||phminposuwXrm",nil,nil,
+nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
+--5x
+[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
+[0x5a] = "||broadcasti128XrlXm",
+--7x
+[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
+--8x
+[0x8c] = "||pmaskmovXrvVSm",
+[0x8e] = "||pmaskmovVSmXvr",
 --Fx
 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
 },
 
 ["3a"] = { -- [66] 0f 3a xx
 --0x
-[0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
-"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
-"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
+[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
+"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
+"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
+"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
 --1x
 nil,nil,nil,nil,
 "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
-nil,nil,nil,nil,nil,nil,nil,nil,
+"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
+nil,nil,nil,nil,
 --2x
-"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
+"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
+--3x
+[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
 --4x
-[0x40] = "||dppsXrmu",
-[0x41] = "||dppdXrmu",
-[0x42] = "||mpsadbwXrmu",
+[0x40] = "||dppsXrvmu",
+[0x41] = "||dppdXrvmu",
+[0x42] = "||mpsadbwXrvmu",
+[0x46] = "||perm2i128Xrvmu",
+[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
+[0x4c] = "||pblendvbXrvmb",
 --6x
 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
@@ -354,17 +371,19 @@ local map_regs = {
 	"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
   X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
 	"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
+  Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
+	"ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
 }
 local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
 
 -- Maps for size names.
 local map_sz2n = {
-  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
+  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
 }
 local map_sz2prefix = {
   B = "byte", W = "word", D = "dword",
   Q = "qword",
-  M = "qword", X = "xword",
+  M = "qword", X = "xword", Y = "yword",
   F = "dword", G = "qword", -- No need for sizes/register names for these two.
 }
 
@@ -387,10 +406,13 @@ local function putop(ctx, text, operands)
   if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
   if ctx.rex then
     local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
-	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
-    if t ~= "" then text = "rex."..t.." "..text end
+	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
+	      (ctx.vexl and "l" or "")
+    if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
+    if t ~= "" then text = ctx.rex.."."..t.." "..text
+    elseif ctx.rex == "vex" then text = "v"..text end
     ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
-    ctx.rex = false
+    ctx.rex = false; ctx.vexl = false; ctx.vexv = false
   end
   if ctx.seg then
     local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -405,6 +427,7 @@ local function putop(ctx, text, operands)
   end
   ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
   ctx.mrm = false
+  ctx.vexv = false
   ctx.start = pos
   ctx.imm = nil
 end
@@ -413,7 +436,7 @@ end
 local function clearprefixes(ctx)
   ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
   ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
-  ctx.rex = false; ctx.a32 = false
+  ctx.rex = false; ctx.a32 = false; ctx.vexl = false
 end
 
 -- Fallback for incomplete opcodes at the end.
@@ -450,9 +473,9 @@ end
 -- Process pattern string and generate the operands.
 local function putpat(ctx, name, pat)
   local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
-  local code, pos, stop = ctx.code, ctx.pos, ctx.stop
+  local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
 
-  -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
+  -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
   for p in gmatch(pat, ".") do
     local x = nil
     if p == "V" or p == "U" then
@@ -467,11 +490,13 @@ local function putpat(ctx, name, pat)
     elseif p == "B" then
       sz = "B"
       regs = ctx.rex and map_regs.B64 or map_regs.B
-    elseif match(p, "[WDQMXFG]") then
+    elseif match(p, "[WDQMXYFG]") then
       sz = p
+      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
       regs = map_regs[sz]
     elseif p == "P" then
       sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
       regs = map_regs[sz]
     elseif p == "S" then
       name = name..lower(sz)
@@ -484,6 +509,10 @@ local function putpat(ctx, name, pat)
       local imm = getimm(ctx, pos, 1); if not imm then return end
       x = format("0x%02x", imm)
       pos = pos+1
+    elseif p == "b" then
+      local imm = getimm(ctx, pos, 1); if not imm then return end
+      x = regs[imm/16+1]
+      pos = pos+1
     elseif p == "w" then
       local imm = getimm(ctx, pos, 2); if not imm then return end
       x = format("0x%x", imm)
@@ -532,7 +561,7 @@ local function putpat(ctx, name, pat)
 	local lo = imm % 0x1000000
 	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
       else
-	x = format("0x%08x", imm)
+	x = "0x"..tohex(imm)
       end
     elseif p == "R" then
       local r = byte(code, pos-1, pos-1)%8
@@ -616,8 +645,13 @@ local function putpat(ctx, name, pat)
 	else
 	  x = "CR"..sp
 	end
+      elseif p == "v" then
+	if ctx.vexv then
+	  x = regs[ctx.vexv+1]; ctx.vexv = false
+	end
       elseif p == "y" then x = "DR"..sp
       elseif p == "z" then x = "TR"..sp
+      elseif p == "l" then vexl = false
       elseif p == "t" then
       else
 	error("bad pattern `"..pat.."'")
@@ -692,7 +726,7 @@ map_act = {
   B = putpat, W = putpat, D = putpat, Q = putpat,
   V = putpat, U = putpat, T = putpat,
   M = putpat, X = putpat, P = putpat,
-  F = putpat, G = putpat,
+  F = putpat, G = putpat, Y = putpat,
 
   -- Collect prefixes.
   [":"] = function(ctx, name, pat)
@@ -753,15 +787,68 @@ map_act = {
 
   -- REX prefix.
   rex = function(ctx, name, pat)
-    if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
+    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
     for p in gmatch(pat, ".") do ctx["rex"..p] = true end
-    ctx.rex = true
+    ctx.rex = "rex"
+  end,
+
+  -- VEX prefix.
+  vex = function(ctx, name, pat)
+    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
+    ctx.rex = "vex"
+    local pos = ctx.pos
+    if ctx.mrm then
+      ctx.mrm = nil
+      pos = pos-1
+    end
+    local b = byte(ctx.code, pos, pos)
+    if not b then return incomplete(ctx) end
+    pos = pos+1
+    if b < 128 then ctx.rexr = true end
+    local m = 1
+    if pat == "3" then
+      m = b%32; b = (b-m)/32
+      local nb = b%2; b = (b-nb)/2
+      if nb == 0 then ctx.rexb = true end
+      local nx = b%2; b = (b-nx)/2
+      if nx == 0 then ctx.rexx = true end
+      b = byte(ctx.code, pos, pos)
+      if not b then return incomplete(ctx) end
+      pos = pos+1
+      if b >= 128 then ctx.rexw = true end
+    end
+    ctx.pos = pos
+    local map
+    if m == 1 then map = map_opc2
+    elseif m == 2 then map = map_opc3["38"]
+    elseif m == 3 then map = map_opc3["3a"]
+    else return unknown(ctx) end
+    local p = b%4; b = (b-p)/4
+    if p == 1 then ctx.o16 = "o16"
+    elseif p == 2 then ctx.rep = "rep"
+    elseif p == 3 then ctx.rep = "repne" end
+    local l = b%2; b = (b-l)/2
+    if l ~= 0 then ctx.vexl = true end
+    ctx.vexv = (-1-b)%16
+    return dispatchmap(ctx, map)
   end,
 
   -- Special case for nop with REX prefix.
   nop = function(ctx, name, pat)
     return dispatch(ctx, ctx.rex and pat or "nop")
   end,
+
+  -- Special case for 0F 77.
+  emms = function(ctx, name, pat)
+    if ctx.rex ~= "vex" then
+      return putop(ctx, "emms")
+    elseif ctx.vexl then
+      ctx.vexl = false
+      return putop(ctx, "zeroall")
+    else
+      return putop(ctx, "zeroupper")
+    end
+  end,
 }
 
 ------------------------------------------------------------------------------
@@ -782,7 +869,7 @@ local function disass_block(ctx, ofs, len)
 end
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   ctx.code = code
   ctx.addr = (addr or 0) - 1
@@ -796,8 +883,8 @@ local function create_(code, addr, out)
   return ctx
 end
 
-local function create64_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create64(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.x64 = true
   ctx.map1 = map_opc1_64
   ctx.aregs = map_regs.Q
@@ -805,32 +892,32 @@ local function create64_(code, addr, out)
 end
 
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 
-local function disass64_(code, addr, out)
-  create64_(code, addr, out):disass()
+local function disass64(code, addr, out)
+  create64(code, addr, out):disass()
 end
 
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 8 then return map_regs.D[r+1] end
   return map_regs.X[r-7]
 end
 
-local function regname64_(r)
+local function regname64(r)
   if r < 16 then return map_regs.Q[r+1] end
   return map_regs.X[r-15]
 end
 
 -- Public module functions.
-module(...)
-
-create = create_
-create64 = create64_
-disass = disass_
-disass64 = disass64_
-regname = regname_
-regname64 = regname64_
+return {
+  create = create,
+  create64 = create64,
+  disass = disass,
+  disass64 = disass64,
+  regname = regname,
+  regname64 = regname64
+}
 

+ 21 - 14
jni/LuaJIT-2.0.1/src/jit/dump.lua → jni/LuaJIT-2.1/src/jit/dump.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT compiler dump module.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -36,6 +36,7 @@
 --  * m  Dump the generated machine code.
 --    x  Print each taken trace exit.
 --    X  Print each taken trace exit and the contents of all registers.
+--    a  Print the IR of aborted traces, too.
 --
 -- The output format can be set with the following characters:
 --
@@ -54,7 +55,7 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20001, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -62,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
 local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
 local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
 local bit = require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
 local sub, gsub, format = string.sub, string.gsub, string.format
 local byte, char, rep = string.byte, string.char, string.rep
 local type, tostring = type, tostring
@@ -90,6 +91,7 @@ local function fillsymtab_tr(tr, nexit)
   end
   for i=0,nexit-1 do
     local addr = traceexitstub(tr, i)
+    if addr < 0 then addr = addr + 2^32 end
     t[addr] = tostring(i)
   end
   local addr = traceexitstub(tr, nexit)
@@ -103,7 +105,10 @@ local function fillsymtab(tr, nexit)
     local ircall = vmdef.ircall
     for i=0,#ircall do
       local addr = ircalladdr(i)
-      if addr ~= 0 then t[addr] = ircall[i] end
+      if addr ~= 0 then
+	if addr < 0 then addr = addr + 2^32 end
+	t[addr] = ircall[i]
+      end
     end
   end
   if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -117,6 +122,7 @@ local function fillsymtab(tr, nexit)
 	nexit = 1000000
 	break
       end
+      if addr < 0 then addr = addr + 2^32 end
       t[addr] = tostring(i)
     end
     nexitsym = nexit
@@ -135,6 +141,7 @@ local function dump_mcode(tr)
   local mcode, addr, loop = tracemc(tr)
   if not mcode then return end
   if not disass then disass = require("jit.dis_"..jit.arch) end
+  if addr < 0 then addr = addr + 2^32 end
   out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
   local ctx = disass.create(mcode, addr, dumpwrite)
   ctx.hexdump = 0
@@ -269,8 +276,7 @@ local litname = {
   ["CONV  "] = setmetatable({}, { __index = function(t, mode)
     local s = irtype[band(mode, 31)]
     s = irtype[band(shr(mode, 5), 31)].."."..s
-    if band(mode, 0x400) ~= 0 then s = s.." trunc"
-    elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
+    if band(mode, 0x800) ~= 0 then s = s.." sext" end
     local c = shr(mode, 14)
     if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
     t[mode] = s
@@ -279,6 +285,8 @@ local litname = {
   ["FLOAD "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FPMATH"] = vmdef.irfpm,
+  ["BUFHDR"] = { [0] = "RESET", "APPEND" },
+  ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
 }
 
 local function ctlsub(c)
@@ -546,10 +554,8 @@ local function dump_trace(what, tr, func, pc, otr, oex)
     out:write("---- TRACE ", tr, " ", what)
     if otr then out:write(" ", otr, "/", oex) end
     out:write(" ", fmtfunc(func, pc), "\n")
-    recprefix = ""
   elseif what == "stop" or what == "abort" then
     out:write("---- TRACE ", tr, " ", what)
-    recprefix = nil
     if what == "abort" then
       out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
     else
@@ -565,6 +571,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
     end
     if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
   else
+    if what == "flush" then symtab, nexitsym = {}, 0 end
     out:write("---- TRACE ", what, "\n\n")
   end
   out:flush()
@@ -608,7 +615,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
       end
     else
       for i=1,ngpr do
-	out:write(format(" %08x", regs[i]))
+	out:write(" ", tohex(regs[i]))
 	if i % 8 == 0 then out:write("\n") end
       end
     end
@@ -692,9 +699,9 @@ local function dumpon(opt, outfile)
 end
 
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 

+ 310 - 0
jni/LuaJIT-2.1/src/jit/p.lua

@@ -0,0 +1,310 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module is a simple command line interface to the built-in
+-- low-overhead profiler of LuaJIT.
+--
+-- The lower-level API of the profiler is accessible via the "jit.profile"
+-- module or the luaJIT_profile_* C API.
+--
+-- Example usage:
+--
+--   luajit -jp myapp.lua
+--   luajit -jp=s myapp.lua
+--   luajit -jp=-s myapp.lua
+--   luajit -jp=vl myapp.lua
+--   luajit -jp=G,profile.txt myapp.lua
+--
+-- The following dump features are available:
+--
+--   f  Stack dump: function name, otherwise module:line. Default mode.
+--   F  Stack dump: ditto, but always prepend module.
+--   l  Stack dump: module:line.
+--   <number> stack dump depth (callee < caller). Default: 1.
+--   -<number> Inverse stack dump depth (caller > callee).
+--   s  Split stack dump after first stack level. Implies abs(depth) >= 2.
+--   p  Show full path for module names.
+--   v  Show VM states. Can be combined with stack dumps, e.g. vf or fv.
+--   z  Show zones. Can be combined with stack dumps, e.g. zf or fz.
+--   r  Show raw sample counts. Default: show percentages.
+--   a  Annotate excerpts from source code files.
+--   A  Annotate complete source code files.
+--   G  Produce raw output suitable for graphical tools (e.g. flame graphs).
+--   m<number> Minimum sample percentage to be shown. Default: 3.
+--   i<number> Sampling interval in milliseconds. Default: 10.
+--
+----------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
+local profile = require("jit.profile")
+local vmdef = require("jit.vmdef")
+local math = math
+local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
+local sort, format = table.sort, string.format
+local stdout = io.stdout
+local zone -- Load jit.zone module on demand.
+
+-- Output file handle.
+local out
+
+------------------------------------------------------------------------------
+
+local prof_ud
+local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
+local prof_ann, prof_count1, prof_count2, prof_samples
+
+local map_vmmode = {
+  N = "Compiled",
+  I = "Interpreted",
+  C = "C code",
+  G = "Garbage Collector",
+  J = "JIT Compiler",
+}
+
+-- Profiler callback.
+local function prof_cb(th, samples, vmmode)
+  prof_samples = prof_samples + samples
+  local key_stack, key_stack2, key_state
+  -- Collect keys for sample.
+  if prof_states then
+    if prof_states == "v" then
+      key_state = map_vmmode[vmmode] or vmmode
+    else
+      key_state = zone:get() or "(none)"
+    end
+  end
+  if prof_fmt then
+    key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
+    key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
+      return vmdef.ffnames[tonumber(x)]
+    end)
+    if prof_split == 2 then
+      local k1, k2 = key_stack:match("(.-) [<>] (.*)")
+      if k2 then key_stack, key_stack2 = k1, k2 end
+    elseif prof_split == 3 then
+      key_stack2 = profile.dumpstack(th, "l", 1)
+    end
+  end
+  -- Order keys.
+  local k1, k2
+  if prof_split == 1 then
+    if key_state then
+      k1 = key_state
+      if key_stack then k2 = key_stack end
+    end
+  elseif key_stack then
+    k1 = key_stack
+    if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
+  end
+  -- Coalesce samples in one or two levels.
+  if k1 then
+    local t1 = prof_count1
+    t1[k1] = (t1[k1] or 0) + samples
+    if k2 then
+      local t2 = prof_count2
+      local t3 = t2[k1]
+      if not t3 then t3 = {}; t2[k1] = t3 end
+      t3[k2] = (t3[k2] or 0) + samples
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Show top N list.
+local function prof_top(count1, count2, samples, indent)
+  local t, n = {}, 0
+  for k, v in pairs(count1) do
+    n = n + 1
+    t[n] = k
+  end
+  sort(t, function(a, b) return count1[a] > count1[b] end)
+  for i=1,n do
+    local k = t[i]
+    local v = count1[k]
+    local pct = floor(v*100/samples + 0.5)
+    if pct < prof_min then break end
+    if not prof_raw then
+      out:write(format("%s%2d%%  %s\n", indent, pct, k))
+    elseif prof_raw == "r" then
+      out:write(format("%s%5d  %s\n", indent, v, k))
+    else
+      out:write(format("%s %d\n", k, v))
+    end
+    if count2 then
+      local r = count2[k]
+      if r then
+	prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and "  -- " or
+			    (prof_depth < 0 and "  -> " or "  <- "))
+      end
+    end
+  end
+end
+
+-- Annotate source code
+local function prof_annotate(count1, samples)
+  local files = {}
+  local ms = 0
+  for k, v in pairs(count1) do
+    local pct = floor(v*100/samples + 0.5)
+    ms = math.max(ms, v)
+    if pct >= prof_min then
+      local file, line = k:match("^(.*):(%d+)$")
+      local fl = files[file]
+      if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
+      line = tonumber(line)
+      fl[line] = prof_raw and v or pct
+    end
+  end
+  sort(files)
+  local fmtv, fmtn = " %3d%% | %s\n", "      | %s\n"
+  if prof_raw then
+    local n = math.max(5, math.ceil(math.log10(ms)))
+    fmtv = "%"..n.."d | %s\n"
+    fmtn = (" "):rep(n).." | %s\n"
+  end
+  local ann = prof_ann
+  for _, file in ipairs(files) do
+    local f0 = file:byte()
+    if f0 == 40 or f0 == 91 then
+      out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
+      break
+    end
+    local fp, err = io.open(file)
+    if not fp then
+      out:write(format("====== ERROR: %s: %s\n", file, err))
+      break
+    end
+    out:write(format("\n====== %s ======\n", file))
+    local fl = files[file]
+    local n, show = 1, false
+    if ann ~= 0 then
+      for i=1,ann do
+	if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
+      end
+    end
+    for line in fp:lines() do
+      if line:byte() == 27 then
+	out:write("[Cannot annotate bytecode file]\n")
+	break
+      end
+      local v = fl[n]
+      if ann ~= 0 then
+	local v2 = fl[n+ann]
+	if show then
+	  if v2 then show = n+ann elseif v then show = n
+	  elseif show+ann < n then show = false end
+	elseif v2 then
+	  show = n+ann
+	  out:write(format("@@ %d @@\n", n))
+	end
+	if not show then goto next end
+      end
+      if v then
+	out:write(format(fmtv, v, line))
+      else
+	out:write(format(fmtn, line))
+      end
+    ::next::
+      n = n + 1
+    end
+    fp:close()
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Finish profiling and dump result.
+local function prof_finish()
+  if prof_ud then
+    profile.stop()
+    local samples = prof_samples
+    if samples == 0 then
+      if prof_raw ~= true then out:write("[No samples collected]\n") end
+      return
+    end
+    if prof_ann then
+      prof_annotate(prof_count1, samples)
+    else
+      prof_top(prof_count1, prof_count2, samples, "")
+    end
+    prof_count1 = nil
+    prof_count2 = nil
+    prof_ud = nil
+  end
+end
+
+-- Start profiling.
+local function prof_start(mode)
+  local interval = ""
+  mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
+  prof_min = 3
+  mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
+  prof_depth = 1
+  mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
+  local m = {}
+  for c in mode:gmatch(".") do m[c] = c end
+  prof_states = m.z or m.v
+  if prof_states == "z" then zone = require("jit.zone") end
+  local scope = m.l or m.f or m.F or (prof_states and "" or "f")
+  local flags = (m.p or "")
+  prof_raw = m.r
+  if m.s then
+    prof_split = 2
+    if prof_depth == -1 or m["-"] then prof_depth = -2
+    elseif prof_depth == 1 then prof_depth = 2 end
+  elseif mode:find("[fF].*l") then
+    scope = "l"
+    prof_split = 3
+  else
+    prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
+  end
+  prof_ann = m.A and 0 or (m.a and 3)
+  if prof_ann then
+    scope = "l"
+    prof_fmt = "pl"
+    prof_split = 0
+    prof_depth = 1
+  elseif m.G and scope ~= "" then
+    prof_fmt = flags..scope.."Z;"
+    prof_depth = -100
+    prof_raw = true
+    prof_min = 0
+  elseif scope == "" then
+    prof_fmt = false
+  else
+    local sc = prof_split == 3 and m.f or m.F or scope
+    prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
+  end
+  prof_count1 = {}
+  prof_count2 = {}
+  prof_samples = 0
+  profile.start(scope:lower()..interval, prof_cb)
+  prof_ud = newproxy(true)
+  getmetatable(prof_ud).__gc = prof_finish
+end
+
+------------------------------------------------------------------------------
+
+local function start(mode, outfile)
+  if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
+  if outfile then
+    out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+  else
+    out = stdout
+  end
+  prof_start(mode or "f")
+end
+
+-- Public module functions.
+return {
+  start = start, -- For -j command line option.
+  stop = prof_finish
+}
+

+ 10 - 7
jni/LuaJIT-2.0.1/src/jit/v.lua → jni/LuaJIT-2.1/src/jit/v.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- Verbose mode of the LuaJIT compiler.
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -59,7 +59,7 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20001, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
       if ltype == "interpreter" then
 	out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
 	  tr, startex, startloc))
+      elseif ltype == "stitch" then
+	out:write(format("[TRACE %3s %s%s %s %s]\n",
+	  tr, startex, startloc, ltype, fmtfunc(func, pc)))
       elseif link == tr or link == 0 then
 	out:write(format("[TRACE %3s %s%s %s]\n",
 	  tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
 end
 
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 

+ 45 - 0
jni/LuaJIT-2.1/src/jit/zone.lua

@@ -0,0 +1,45 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler zones.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module implements a simple hierarchical zone model.
+--
+-- Example usage:
+--
+--   local zone = require("jit.zone")
+--   zone("AI")
+--   ...
+--     zone("A*")
+--     ...
+--     print(zone:get()) --> "A*"
+--     ...
+--     zone()
+--   ...
+--   print(zone:get()) --> "AI"
+--   ...
+--   zone()
+--
+----------------------------------------------------------------------------
+
+local remove = table.remove
+
+return setmetatable({
+  flush = function(t)
+    for i=#t,1,-1 do t[i] = nil end
+  end,
+  get = function(t)
+    return t[#t]
+  end
+}, {
+  __call = function(t, zone)
+    if zone then
+      t[#t+1] = zone
+    else
+      return (assert(remove(t), "empty zone stack"))
+    end
+  end
+})
+

+ 0 - 0
jni/LuaJIT-2.0.1/src/lauxlib.h → jni/LuaJIT-2.1/src/lauxlib.h


+ 2 - 2
jni/LuaJIT-2.0.1/src/lib_aux.c → jni/LuaJIT-2.1/src/lib_aux.c

@@ -1,6 +1,6 @@
 /*
 ** Auxiliary library for the Lua/C API.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major parts taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -302,7 +302,7 @@ static int panic(lua_State *L)
 
 #ifdef LUAJIT_USE_SYSMALLOC
 
-#if LJ_64
+#if LJ_64 && !defined(LUAJIT_USE_VALGRIND)
 #error "Must use builtin allocator for 64 bit target"
 #endif
 

+ 34 - 53
jni/LuaJIT-2.0.1/src/lib_base.c → jni/LuaJIT-2.1/src/lib_base.c

@@ -1,6 +1,6 @@
 /*
 ** Base and coroutine library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -32,6 +32,7 @@
 #include "lj_dispatch.h"
 #include "lj_char.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 
 /* -- Base library: checks ------------------------------------------------ */
@@ -86,10 +87,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
   cTValue *mo = lj_meta_lookup(L, o, mm);
   if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
     L->top = o+1;  /* Only keep one argument. */
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
   } else {
     if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
+    if (LJ_FR2) { copyTV(L, o-1, o); o--; }
     setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
     if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
     return FFH_RES(3);
@@ -100,7 +102,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
 #endif
 
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(pairs)
+LJLIB_ASM(pairs)		LJLIB_REC(xpairs 0)
 {
   return ffh_pairs(L, MM_pairs);
 }
@@ -113,7 +115,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux)	LJLIB_REC(.)
 }
 
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(ipairs)		LJLIB_REC(.)
+LJLIB_ASM(ipairs)		LJLIB_REC(xpairs 1)
 {
   return ffh_pairs(L, MM_ipairs);
 }
@@ -131,11 +133,11 @@ LJLIB_ASM(setmetatable)		LJLIB_REC(.)
     lj_err_caller(L, LJ_ERR_PROTMT);
   setgcref(t->metatable, obj2gco(mt));
   if (mt) { lj_gc_objbarriert(L, t, mt); }
-  settabV(L, L->base-1, t);
+  settabV(L, L->base-1-LJ_FR2, t);
   return FFH_RES(1);
 }
 
-LJLIB_CF(getfenv)
+LJLIB_CF(getfenv)		LJLIB_REC(.)
 {
   GCfunc *fn;
   cTValue *o = L->base;
@@ -144,6 +146,7 @@ LJLIB_CF(getfenv)
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   fn = &gcval(o)->fn;
   settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +168,7 @@ LJLIB_CF(setfenv)
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   fn = &gcval(o)->fn;
   if (!isluafunc(fn))
@@ -257,7 +261,7 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
   if (base == 10) {
     TValue *o = lj_lib_checkany(L, 1);
     if (lj_strscan_numberobj(o)) {
-      copyTV(L, L->base-1, o);
+      copyTV(L, L->base-1-LJ_FR2, o);
       return FFH_RES(1);
     }
 #if LJ_HASFFI
@@ -270,11 +274,11 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
 	    ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
 	  int32_t i;
 	  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
-	  setintV(L->base-1, i);
+	  setintV(L->base-1-LJ_FR2, i);
 	  return FFH_RES(1);
 	}
 	lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
-		       (uint8_t *)&(L->base-1)->n, o, 0);
+		       (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
 	return FFH_RES(1);
       }
     }
@@ -290,45 +294,29 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
       while (lj_char_isspace((unsigned char)(*ep))) ep++;
       if (*ep == '\0') {
 	if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
-	  setintV(L->base-1, (int32_t)ul);
+	  setintV(L->base-1-LJ_FR2, (int32_t)ul);
 	else
-	  setnumV(L->base-1, (lua_Number)ul);
+	  setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
 	return FFH_RES(1);
       }
     }
   }
-  setnilV(L->base-1);
+  setnilV(L->base-1-LJ_FR2);
   return FFH_RES(1);
 }
 
-LJLIB_PUSH("nil")
-LJLIB_PUSH("false")
-LJLIB_PUSH("true")
 LJLIB_ASM(tostring)		LJLIB_REC(.)
 {
   TValue *o = lj_lib_checkany(L, 1);
   cTValue *mo;
   L->top = o+1;  /* Only keep one argument. */
   if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
-  } else {
-    GCstr *s;
-    if (tvisnumber(o)) {
-      s = lj_str_fromnumber(L, o);
-    } else if (tvispri(o)) {
-      s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
-    } else {
-      if (tvisfunc(o) && isffunc(funcV(o)))
-	lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
-      else
-	lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
-      /* Note: lua_pushfstring calls the GC which may invalidate o. */
-      s = strV(L->top-1);
-    }
-    setstrV(L, L->base-1, s);
-    return FFH_RES(1);
   }
+  lj_gc_check(L);
+  setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
+  return FFH_RES(1);
 }
 
 /* -- Base library: throw and catch errors -------------------------------- */
@@ -440,20 +428,20 @@ LJLIB_CF(dofile)
 
 LJLIB_CF(gcinfo)
 {
-  setintV(L->top++, (G(L)->gc.total >> 10));
+  setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
   return 1;
 }
 
 LJLIB_CF(collectgarbage)
 {
   int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT,  /* ORDER LUA_GC* */
-    "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul");
+    "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning");
   int32_t data = lj_lib_optint(L, 2, 0);
   if (opt == LUA_GCCOUNT) {
     setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
   } else {
     int res = lua_gc(L, opt, data);
-    if (opt == LUA_GCSTEP)
+    if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING)
       setboolV(L->top, res);
     else
       setintV(L->top, res);
@@ -506,21 +494,13 @@ LJLIB_CF(print)
   }
   shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
   for (i = 0; i < nargs; i++) {
+    cTValue *o = &L->base[i];
+    char buf[STRFMT_MAXBUF_NUM];
     const char *str;
     size_t size;
-    cTValue *o = &L->base[i];
-    if (shortcut && tvisstr(o)) {
-      str = strVdata(o);
-      size = strV(o)->len;
-    } else if (shortcut && tvisint(o)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(o));
-      size = (size_t)(buf+LJ_STR_INTBUF-p);
-      str = p;
-    } else if (shortcut && tvisnum(o)) {
-      char buf[LJ_STR_NUMBUF];
-      size = lj_str_bufnum(buf, o);
-      str = buf;
+    MSize len;
+    if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) {
+      size = len;
     } else {
       copyTV(L, L->top+1, o);
       copyTV(L, L->top, L->top-1);
@@ -558,7 +538,7 @@ LJLIB_CF(coroutine_status)
   if (co == L) s = "running";
   else if (co->status == LUA_YIELD) s = "suspended";
   else if (co->status != 0) s = "dead";
-  else if (co->base > tvref(co->stack)+1) s = "normal";
+  else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
   else if (co->top == co->base) s = "dead";
   else s = "suspended";
   lua_pushstring(L, s);
@@ -600,8 +580,8 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
       (co->status == 0 && co->top == co->base)) {
     ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
     if (wrap) lj_err_caller(L, em);
-    setboolV(L->base-1, 0);
-    setstrV(L, L->base, lj_err_str(L, em));
+    setboolV(L->base-1-LJ_FR2, 0);
+    setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
     return FFH_RES(2);
   }
   lj_state_growstack(co, (MSize)(L->top - L->base));
@@ -642,9 +622,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
 
 LJLIB_CF(coroutine_wrap)
 {
+  GCfunc *fn;
   lj_cf_coroutine_create(L);
-  lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
-  setpc_wrap_aux(L, funcV(L->top-1));
+  fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
+  setpc_wrap_aux(L, fn);
   return 1;
 }
 

+ 180 - 0
jni/LuaJIT-2.1/src/lib_bit.c

@@ -0,0 +1,180 @@
+/*
+** Bit manipulation library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_bit_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_strscan.h"
+#include "lj_strfmt.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#include "lj_carith.h"
+#endif
+#include "lj_ff.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_bit
+
+#if LJ_HASFFI
+static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
+{
+  GCcdata *cd = lj_cdata_new_(L, id, 8);
+  *(uint64_t *)cdataptr(cd) = x;
+  setcdataV(L, L->base-1-LJ_FR2, cd);
+  return FFH_RES(1);
+}
+#else
+static int32_t bit_checkbit(lua_State *L, int narg)
+{
+  TValue *o = L->base + narg-1;
+  if (!(o < L->top && lj_strscan_numberobj(o)))
+    lj_err_argt(L, narg, LUA_TNUMBER);
+  if (LJ_LIKELY(tvisint(o))) {
+    return intV(o);
+  } else {
+    int32_t i = lj_num2bit(numV(o));
+    if (LJ_DUALNUM) setintV(o, i);
+    return i;
+  }
+}
+#endif
+
+LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_tobit)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
+  return FFH_RES(1);
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, ~x) : FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
+{
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
+  if (id) {
+    x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
+    return bit_result64(L, id, x);
+  }
+  if (id2) setintV(L->base+1, sh);
+  return FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  bit_checkbit(L, 2);
+  return FFH_RETRY;
+#endif
+}
+LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
+LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
+LJLIB_ASM_(bit_rol)		LJLIB_REC(bit_shift IR_BROL)
+LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
+
+LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  TValue *o = L->base, *top = L->top;
+  int i = 0;
+  do { lj_carith_check64(L, ++i, &id); } while (++o < top);
+  if (id) {
+    CTState *cts = ctype_cts(L);
+    CType *ct = ctype_get(cts, id);
+    int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
+    uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
+    o = L->base;
+    do {
+      lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
+      if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
+    } while (++o < top);
+    return bit_result64(L, id, y);
+  }
+  return FFH_RETRY;
+#else
+  int i = 0;
+  do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
+  return FFH_RETRY;
+#endif
+}
+LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
+LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(bit_tohex)		LJLIB_REC(.)
+{
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t b = lj_carith_check64(L, 1, &id);
+  int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
+				  (int32_t)lj_carith_check64(L, 2, &id2);
+#else
+  uint32_t b = (uint32_t)bit_checkbit(L, 1);
+  int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
+#endif
+  SBuf *sb = lj_buf_tmp_(L);
+  SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
+  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
+#if LJ_HASFFI
+  if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
+#else
+  if (n < 8) b &= (1u << 4*n)-1;
+#endif
+  sb = lj_strfmt_putfxint(sb, sf, b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_bit(lua_State *L)
+{
+  LJ_LIB_REG(L, LUA_BITLIBNAME, bit);
+  return 1;
+}
+

+ 2 - 2
jni/LuaJIT-2.0.1/src/lib_debug.c → jni/LuaJIT-2.1/src/lib_debug.c

@@ -1,6 +1,6 @@
 /*
 ** Debug library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
   return 1;
 }
 
-LJLIB_CF(debug_getmetatable)
+LJLIB_CF(debug_getmetatable)	LJLIB_REC(.)
 {
   lj_lib_checkany(L, 1);
   if (!lua_getmetatable(L, 1)) {

+ 37 - 15
jni/LuaJIT-2.0.1/src/lib_ffi.c → jni/LuaJIT-2.1/src/lib_ffi.c

@@ -1,6 +1,6 @@
 /*
 ** FFI library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lib_ffi_c
@@ -29,6 +29,7 @@
 #include "lj_ccall.h"
 #include "lj_ccallback.h"
 #include "lj_clib.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 
@@ -136,7 +137,8 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
 	return 0;
       }
     }
-    tv = L->top-1;
+    copyTV(L, base, L->top);
+    tv = L->top-1-LJ_FR2;
   }
   return lj_meta_tailcall(L, tv);
 }
@@ -317,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
       }
     }
   }
-  lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
+  lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
 checkgc:
   lj_gc_check(L);
   return 1;
@@ -506,7 +508,7 @@ LJLIB_CF(ffi_new)	LJLIB_REC(.)
   if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
     cd = lj_cdata_new(cts, id, sz);
   else
-    cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
+    cd = lj_cdata_newv(L, id, sz, ctype_align(info));
   setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
   lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
@@ -557,6 +559,31 @@ LJLIB_CF(ffi_typeof)	LJLIB_REC(.)
   return 1;
 }
 
+/* Internal and unsupported API. */
+LJLIB_CF(ffi_typeinfo)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = (CTypeID)ffi_checkint(L, 1);
+  if (id > 0 && id < cts->top) {
+    CType *ct = ctype_get(cts, id);
+    GCtab *t;
+    lua_createtable(L, 0, 4);  /* Increment hash size if fields are added. */
+    t = tabV(L->top-1);
+    setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
+    if (ct->size != CTSIZE_INVALID)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
+    if (ct->sib)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
+    if (gcref(ct->name)) {
+      GCstr *s = gco2str(gcref(ct->name));
+      setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
+    }
+    lj_gc_check(L);
+    return 1;
+  }
+  return 0;
+}
+
 LJLIB_CF(ffi_istype)	LJLIB_REC(.)
 {
   CTState *cts = ctype_cts(L);
@@ -576,7 +603,7 @@ LJLIB_CF(ffi_istype)	LJLIB_REC(.)
       if (ctype_ispointer(ct1->info))
 	b = lj_cconv_compatptr(cts, ct1, ct2, CCF_IGNQUAL);
       else if (ctype_isnum(ct1->info) || ctype_isvoid(ct1->info))
-	b = (((ct1->info ^ ct2->info) & ~CTF_QUAL) == 0);
+	b = (((ct1->info ^ ct2->info) & ~(CTF_QUAL|CTF_LONG)) == 0);
     } else if (ctype_isstruct(ct1->info) && ctype_isptr(ct2->info) &&
 	       ct1 == ctype_rawchild(cts, ct2)) {
       b = 1;
@@ -657,7 +684,7 @@ LJLIB_CF(ffi_string)	LJLIB_REC(.)
   TValue *o = lj_lib_checkany(L, 1);
   const char *p;
   size_t len;
-  if (o+1 < L->top) {
+  if (o+1 < L->top && !tvisnil(o+1)) {
     len = (size_t)ffi_checkint(L, 2);
     lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, o,
 		   CCF_ARG(1));
@@ -724,6 +751,9 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
   case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
 #endif
   case H_(3af93066,1f001464): b = 1; break;  /* le/be */
+#if LJ_GC64
+  case H_(9e89d2c9,13c83c92): b = 1; break;  /* gc64 */
+#endif
   default:
     break;
   }
@@ -767,19 +797,11 @@ LJLIB_CF(ffi_gc)	LJLIB_REC(.)
   GCcdata *cd = ffi_checkcdata(L, 1);
   TValue *fin = lj_lib_checkany(L, 2);
   CTState *cts = ctype_cts(L);
-  GCtab *t = cts->finalizer;
   CType *ct = ctype_raw(cts, cd->ctypeid);
   if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
 	ctype_isrefarray(ct->info)))
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
-  if (gcref(t->metatable)) {  /* Update finalizer table, if still enabled. */
-    copyTV(L, lj_tab_set(L, t, L->base), fin);
-    lj_gc_anybarriert(L, t);
-    if (!tvisnil(fin))
-      cd->marked |= LJ_GC_CDATA_FIN;
-    else
-      cd->marked &= ~LJ_GC_CDATA_FIN;
-  }
+  lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
   L->top = L->base+1;  /* Pass through the cdata object. */
   return 1;
 }

+ 1 - 1
jni/LuaJIT-2.0.1/src/lib_init.c → jni/LuaJIT-2.1/src/lib_init.c

@@ -1,6 +1,6 @@
 /*
 ** Library initialization.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major parts taken verbatim from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

+ 30 - 24
jni/LuaJIT-2.0.1/src/lib_io.c → jni/LuaJIT-2.1/src/lib_io.c

@@ -1,6 +1,6 @@
 /*
 ** I/O library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -17,9 +17,12 @@
 #include "lualib.h"
 
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_state.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 
@@ -83,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
   IOFileUD *iof = io_file_new(L);
   iof->fp = fopen(fname, mode);
   if (iof->fp == NULL)
-    luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
+    luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
   return iof;
 }
 
@@ -96,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
     int stat = -1;
 #if LJ_TARGET_POSIX
     stat = pclose(iof->fp);
-#elif LJ_TARGET_WINDOWS
+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE
     stat = _pclose(iof->fp);
 #else
     lua_assert(0);
@@ -144,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
   MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
   char *buf;
   for (;;) {
-    buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    buf = lj_buf_tmp(L, m);
     if (fgets(buf+n, m-n, fp) == NULL) break;
     n += (MSize)strlen(buf+n);
     ok |= n;
@@ -152,6 +155,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
     if (n >= m - 64) m += m;
   }
   setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+  lj_gc_check(L);
   return (int)ok;
 }
 
@@ -159,10 +163,11 @@ static void io_file_readall(lua_State *L, FILE *fp)
 {
   MSize m, n;
   for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     n += (MSize)fread(buf+n, 1, m-n, fp);
     if (n != m) {
       setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+      lj_gc_check(L);
       return;
     }
   }
@@ -171,9 +176,10 @@ static void io_file_readall(lua_State *L, FILE *fp)
 static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
 {
   if (m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     MSize n = (MSize)fread(buf, 1, m, fp);
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+    lj_gc_check(L);
     return (n > 0 || m == 0);
   } else {
     int c = getc(fp);
@@ -226,19 +232,12 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
   cTValue *tv;
   int status = 1;
   for (tv = L->base+start; tv < L->top; tv++) {
-    if (tvisstr(tv)) {
-      MSize len = strV(tv)->len;
-      status = status && (fwrite(strVdata(tv), 1, len, fp) == len);
-    } else if (tvisint(tv)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(tv));
-      size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
-      status = status && (fwrite(p, 1, len, fp) == len);
-    } else if (tvisnum(tv)) {
-      status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
-    } else {
+    char buf[STRFMT_MAXBUF_NUM];
+    MSize len;
+    const char *p = lj_strfmt_wstrnum(buf, tv, &len);
+    if (!p)
       lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
-    }
+    status = status && (fwrite(p, 1, len, fp) == len);
   }
   if (LJ_52 && status) {
     L->top = L->base+1;
@@ -274,6 +273,15 @@ static int io_file_iter(lua_State *L)
   return n;
 }
 
+static int io_file_lines(lua_State *L)
+{
+  int n = (int)(L->top - L->base);
+  if (n > LJ_MAX_UPVAL)
+    lj_err_caller(L, LJ_ERR_UNPACK);
+  lua_pushcclosure(L, io_file_iter, n);
+  return 1;
+}
+
 /* -- I/O file methods ---------------------------------------------------- */
 
 #define LJLIB_MODULE_io_method
@@ -357,8 +365,7 @@ LJLIB_CF(io_method_setvbuf)
 LJLIB_CF(io_method_lines)
 {
   io_tofile(L);
-  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
-  return 1;
+  return io_file_lines(L);
 }
 
 LJLIB_CF(io_method___gc)
@@ -401,7 +408,7 @@ LJLIB_CF(io_open)
 
 LJLIB_CF(io_popen)
 {
-#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS
+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE)
   const char *fname = strdata(lj_lib_checkstr(L, 1));
   GCstr *s = lj_lib_optstr(L, 2);
   const char *mode = s ? strdata(s) : "r";
@@ -422,7 +429,7 @@ LJLIB_CF(io_popen)
 LJLIB_CF(io_tmpfile)
 {
   IOFileUD *iof = io_file_new(L);
-#if LJ_TARGET_PS3
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
   iof->fp = NULL; errno = ENOSYS;
 #else
   iof->fp = tmpfile();
@@ -488,8 +495,7 @@ LJLIB_CF(io_lines)
   } else {  /* io.lines() iterates over stdin. */
     setudataV(L, L->base, IOSTDF_UD(L, GCROOT_IO_INPUT));
   }
-  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
-  return 1;
+  return io_file_lines(L);
 }
 
 LJLIB_CF(io_type)

+ 131 - 27
jni/LuaJIT-2.0.1/src/lib_jit.c → jni/LuaJIT-2.1/src/lib_jit.c

@@ -1,6 +1,6 @@
 /*
 ** JIT library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lib_jit_c
@@ -10,13 +10,17 @@
 #include "lauxlib.h"
 #include "lualib.h"
 
-#include "lj_arch.h"
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_debug.h"
 #include "lj_str.h"
 #include "lj_tab.h"
+#include "lj_state.h"
 #include "lj_bc.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
 #if LJ_HASJIT
 #include "lj_ir.h"
 #include "lj_jit.h"
@@ -24,6 +28,7 @@
 #include "lj_iropt.h"
 #include "lj_target.h"
 #endif
+#include "lj_trace.h"
 #include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_vmevent.h"
@@ -73,7 +78,7 @@ LJLIB_CF(jit_off)
 LJLIB_CF(jit_flush)
 {
 #if LJ_HASJIT
-  if (L->base < L->top && !tvisnil(L->base)) {
+  if (L->base < L->top && tvisnumber(L->base)) {
     int traceno = lj_lib_checkint(L, 1);
     luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE);
     return 0;
@@ -279,7 +284,7 @@ static GCtrace *jit_checktrace(lua_State *L)
 /* Names of link types. ORDER LJ_TRLINK */
 static const char *const jit_trlinkname[] = {
   "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
-  "interpreter", "return"
+  "interpreter", "return", "stitch"
 };
 
 /* local info = jit.util.traceinfo(tr) */
@@ -332,6 +337,13 @@ LJLIB_CF(jit_util_tracek)
       slot = ir->op2;
       ir = &T->ir[ir->op1];
     }
+#if LJ_HASFFI
+    if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
+      ptrdiff_t oldtop = savestack(L, L->top);
+      luaopen_ffi(L);  /* Load FFI library on-demand. */
+      L->top = restorestack(L, oldtop);
+    }
+#endif
     lj_ir_kvalue(L, L->top-2, ir);
     setintV(L->top-1, (int32_t)irt_type(ir->t));
     if (slot == -1)
@@ -416,6 +428,12 @@ LJLIB_CF(jit_util_ircalladdr)
 
 #include "lj_libdef.h"
 
+static int luaopen_jit_util(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_util);
+  return 1;
+}
+
 /* -- jit.opt module ------------------------------------------------------ */
 
 #if LJ_HASJIT
@@ -513,6 +531,104 @@ LJLIB_CF(jit_opt_start)
 
 #endif
 
+/* -- jit.profile module -------------------------------------------------- */
+
+#if LJ_HASPROFILE
+
+#define LJLIB_MODULE_jit_profile
+
+/* Not loaded by default, use: local profile = require("jit.profile") */
+
+static const char KEY_PROFILE_THREAD = 't';
+static const char KEY_PROFILE_FUNC = 'f';
+
+static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
+				 int vmstate)
+{
+  TValue key;
+  cTValue *tv;
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  tv = lj_tab_get(L, tabV(registry(L)), &key);
+  if (tvisfunc(tv)) {
+    char vmst = (char)vmstate;
+    int status;
+    setfuncV(L2, L2->top++, funcV(tv));
+    setthreadV(L2, L2->top++, L);
+    setintV(L2->top++, samples);
+    setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
+    status = lua_pcall(L2, 3, 0, 0);  /* callback(thread, samples, vmstate) */
+    if (status) {
+      if (G(L2)->panic) G(L2)->panic(L2);
+      exit(EXIT_FAILURE);
+    }
+    lj_trace_abort(G(L2));
+  }
+}
+
+/* profile.start(mode, cb) */
+LJLIB_CF(jit_profile_start)
+{
+  GCtab *registry = tabV(registry(L));
+  GCstr *mode = lj_lib_optstr(L, 1);
+  GCfunc *func = lj_lib_checkfunc(L, 2);
+  lua_State *L2 = lua_newthread(L);  /* Thread that runs profiler callback. */
+  TValue key;
+  /* Anchor thread and function in registry. */
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setthreadV(L, lj_tab_set(L, registry, &key), L2);
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setfuncV(L, lj_tab_set(L, registry, &key), func);
+  lj_gc_anybarriert(L, registry);
+  luaJIT_profile_start(L, mode ? strdata(mode) : "",
+		       (luaJIT_profile_callback)jit_profile_callback, L2);
+  return 0;
+}
+
+/* profile.stop() */
+LJLIB_CF(jit_profile_stop)
+{
+  GCtab *registry;
+  TValue key;
+  luaJIT_profile_stop(L);
+  registry = tabV(registry(L));
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setnilV(lj_tab_set(L, registry, &key));
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setnilV(lj_tab_set(L, registry, &key));
+  lj_gc_anybarriert(L, registry);
+  return 0;
+}
+
+/* dump = profile.dumpstack([thread,] fmt, depth) */
+LJLIB_CF(jit_profile_dumpstack)
+{
+  lua_State *L2 = L;
+  int arg = 0;
+  size_t len;
+  int depth;
+  GCstr *fmt;
+  const char *p;
+  if (L->top > L->base && tvisthread(L->base)) {
+    L2 = threadV(L->base);
+    arg = 1;
+  }
+  fmt = lj_lib_checkstr(L, arg+1);
+  depth = lj_lib_checkint(L, arg+2);
+  p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
+  lua_pushlstring(L, p, len);
+  return 1;
+}
+
+#include "lj_libdef.h"
+
+static int luaopen_jit_profile(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_profile);
+  return 1;
+}
+
+#endif
+
 /* -- JIT compiler initialization ----------------------------------------- */
 
 #if LJ_HASJIT
@@ -538,23 +654,17 @@ static uint32_t jit_cpudetect(lua_State *L)
   uint32_t features[4];
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
 #if !LJ_HASJIT
-#define JIT_F_CMOV	1
 #define JIT_F_SSE2	2
 #endif
-    flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
 #if LJ_HASJIT
     flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
-      if ((features[0] & 0x0ff00f00) == 0x00000f00)  /* P4. */
-	flags |= JIT_F_P4;  /* Currently unused. */
-      else if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
+      if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
 	flags |= JIT_F_LEA_AGU;
     } else if (vendor[2] == 0x444d4163) {  /* AMD. */
       uint32_t fam = (features[0] & 0x0ff00f00);
-      if (fam == 0x00000f00)  /* K8. */
-	flags |= JIT_F_SPLIT_XMM;
       if (fam >= 0x00000f00)  /* K8, K10. */
 	flags |= JIT_F_PREFER_IMUL;
     }
@@ -562,14 +672,8 @@ static uint32_t jit_cpudetect(lua_State *L)
   }
   /* Check for required instruction set support on x86 (unnecessary on x64). */
 #if LJ_TARGET_X86
-#if !defined(LUAJIT_CPU_NOCMOV)
-  if (!(flags & JIT_F_CMOV))
-    luaL_error(L, "CPU not supported");
-#endif
-#if defined(LUAJIT_CPU_SSE2)
   if (!(flags & JIT_F_SSE2))
-    luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
-#endif
+    luaL_error(L, "CPU with SSE2 required");
 #endif
 #elif LJ_TARGET_ARM
 #if LJ_HASJIT
@@ -591,6 +695,8 @@ static uint32_t jit_cpudetect(lua_State *L)
 	   ver >= 60 ? JIT_F_ARMV6_ : 0;
   flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
 #endif
+#elif LJ_TARGET_ARM64
+  /* No optional CPU features to detect (for now). */
 #elif LJ_TARGET_PPC
 #if LJ_HASJIT
 #if LJ_ARCH_SQRT
@@ -600,8 +706,6 @@ static uint32_t jit_cpudetect(lua_State *L)
   flags |= JIT_F_ROUND;
 #endif
 #endif
-#elif LJ_TARGET_PPCSPE
-  /* Nothing to do. */
 #elif LJ_TARGET_MIPS
 #if LJ_HASJIT
   /* Compile-time MIPS CPU detection. */
@@ -631,11 +735,7 @@ static void jit_init(lua_State *L)
   uint32_t flags = jit_cpudetect(L);
 #if LJ_HASJIT
   jit_State *J = L2J(L);
-#if LJ_TARGET_X86
-  /* Silently turn off the JIT compiler on CPUs without SSE2. */
-  if ((flags & JIT_F_SSE2))
-#endif
-    J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
+  J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
   memcpy(J->param, jit_param_default, sizeof(J->param));
   lj_dispatch_update(G(L));
 #else
@@ -645,19 +745,23 @@ static void jit_init(lua_State *L)
 
 LUALIB_API int luaopen_jit(lua_State *L)
 {
+  jit_init(L);
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
   lua_pushinteger(L, LUAJIT_VERSION_NUM);
   lua_pushliteral(L, LUAJIT_VERSION);
   LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
+#if LJ_HASPROFILE
+  lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
+		tabref(L->env));
+#endif
 #ifndef LUAJIT_DISABLE_JITUTIL
-  LJ_LIB_REG(L, "jit.util", jit_util);
+  lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
 #endif
 #if LJ_HASJIT
   LJ_LIB_REG(L, "jit.opt", jit_opt);
 #endif
   L->top -= 2;
-  jit_init(L);
   return 1;
 }
 

+ 4 - 7
jni/LuaJIT-2.0.1/src/lib_math.c → jni/LuaJIT-2.1/src/lib_math.c

@@ -1,6 +1,6 @@
 /*
 ** Math library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include <math.h>
@@ -57,17 +57,14 @@ LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 #else
     x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
 #endif
-    setnumV(L->base-1, x*y);  /* Do NOT join the expression to x / y. */
+    setnumV(L->base-1-LJ_FR2, x*y);  /* Do NOT join the expression to x / y. */
     return FFH_RES(1);
   }
   return FFH_RETRY;
 }
 
-LJLIB_PUSH(57.29577951308232)
-LJLIB_ASM_(math_deg)		LJLIB_REC(math_degrad)
-
-LJLIB_PUSH(0.017453292519943295)
-LJLIB_ASM_(math_rad)		LJLIB_REC(math_degrad)
+LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
+LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
 
 LJLIB_ASM(math_atan2)		LJLIB_REC(.)
 {

+ 32 - 20
jni/LuaJIT-2.0.1/src/lib_os.c → jni/LuaJIT-2.1/src/lib_os.c

@@ -1,13 +1,12 @@
 /*
 ** OS library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 */
 
 #include <errno.h>
-#include <locale.h>
 #include <time.h>
 
 #define lib_os_c
@@ -18,7 +17,10 @@
 #include "lualib.h"
 
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
 #include "lj_lib.h"
 
 #if LJ_TARGET_POSIX
@@ -27,13 +29,17 @@
 #include <stdio.h>
 #endif
 
+#if !LJ_TARGET_PSVITA
+#include <locale.h>
+#endif
+
 /* ------------------------------------------------------------------------ */
 
 #define LJLIB_MODULE_os
 
 LJLIB_CF(os_execute)
 {
-#if LJ_TARGET_CONSOLE
+#if LJ_NO_SYSTEM
 #if LJ_52
   errno = ENOSYS;
   return luaL_fileresult(L, 0, NULL);
@@ -70,7 +76,7 @@ LJLIB_CF(os_rename)
 
 LJLIB_CF(os_tmpname)
 {
-#if LJ_TARGET_PS3
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
   lj_err_caller(L, LJ_ERR_OSUNIQF);
   return 0;
 #else
@@ -185,7 +191,7 @@ LJLIB_CF(os_date)
 #endif
   }
   if (stm == NULL) {  /* Invalid date? */
-    setnilV(L->top-1);
+    setnilV(L->top++);
   } else if (strcmp(s, "*t") == 0) {
     lua_createtable(L, 0, 9);  /* 9 = number of fields */
     setfield(L, "sec", stm->tm_sec);
@@ -197,23 +203,25 @@ LJLIB_CF(os_date)
     setfield(L, "wday", stm->tm_wday+1);
     setfield(L, "yday", stm->tm_yday+1);
     setboolfield(L, "isdst", stm->tm_isdst);
-  } else {
-    char cc[3];
-    luaL_Buffer b;
-    cc[0] = '%'; cc[2] = '\0';
-    luaL_buffinit(L, &b);
-    for (; *s; s++) {
-      if (*s != '%' || *(s + 1) == '\0') {  /* No conversion specifier? */
-	luaL_addchar(&b, *s);
-      } else {
-	size_t reslen;
-	char buff[200];  /* Should be big enough for any conversion result. */
-	cc[1] = *(++s);
-	reslen = strftime(buff, sizeof(buff), cc, stm);
-	luaL_addlstring(&b, buff, reslen);
+  } else if (*s) {
+    SBuf *sb = &G(L)->tmpbuf;
+    MSize sz = 0;
+    const char *q;
+    for (q = s; *q; q++)
+      sz += (*q == '%') ? 30 : 1;  /* Overflow doesn't matter. */
+    setsbufL(sb, L);
+    for (;;) {
+      char *buf = lj_buf_need(sb, sz);
+      size_t len = strftime(buf, sbufsz(sb), s, stm);
+      if (len) {
+	setstrV(L, L->top++, lj_str_new(L, buf, len));
+	lj_gc_check(L);
+	break;
       }
+      sz += (sz|1);
     }
-    luaL_pushresult(&b);
+  } else {
+    setstrV(L, L->top++, &G(L)->strempty);
   }
   return 1;
 }
@@ -254,6 +262,9 @@ LJLIB_CF(os_difftime)
 
 LJLIB_CF(os_setlocale)
 {
+#if LJ_TARGET_PSVITA
+  lua_pushliteral(L, "C");
+#else
   GCstr *s = lj_lib_optstr(L, 1);
   const char *str = s ? strdata(s) : NULL;
   int opt = lj_lib_checkopt(L, 2, 6,
@@ -265,6 +276,7 @@ LJLIB_CF(os_setlocale)
   else if (opt == 4) opt = LC_MONETARY;
   else if (opt == 6) opt = LC_ALL;
   lua_pushstring(L, setlocale(opt, str));
+#endif
   return 1;
 }
 

+ 12 - 7
jni/LuaJIT-2.0.1/src/lib_package.c → jni/LuaJIT-2.1/src/lib_package.c

@@ -1,6 +1,6 @@
 /*
 ** Package library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -68,9 +68,6 @@ static const char *ll_bcsym(void *lib, const char *sym)
 #elif LJ_TARGET_WINDOWS
 
 #define WIN32_LEAN_AND_MEAN
-#ifndef WINVER
-#define WINVER 0x0500
-#endif
 #include <windows.h>
 
 #ifndef GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
@@ -99,9 +96,17 @@ static void setprogdir(lua_State *L)
 static void pusherror(lua_State *L)
 {
   DWORD error = GetLastError();
+#if LJ_TARGET_XBOXONE
+  wchar_t wbuffer[128];
+  char buffer[128*2];
+  if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
+      NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) &&
+      WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL))
+#else
   char buffer[128];
   if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
       NULL, error, 0, buffer, sizeof(buffer), NULL))
+#endif
     lua_pushstring(L, buffer);
   else
     lua_pushfstring(L, "system error %d\n", error);
@@ -114,7 +119,7 @@ static void ll_unloadlib(void *lib)
 
 static void *ll_load(lua_State *L, const char *path, int gl)
 {
-  HINSTANCE lib = LoadLibraryA(path);
+  HINSTANCE lib = LoadLibraryExA(path, NULL, 0);
   if (lib == NULL) pusherror(L);
   UNUSED(gl);
   return lib;
@@ -229,7 +234,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
       const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
       lua_pop(L, 1);
       if (bcdata) {
-	if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+	if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
 	  return PACKAGE_ERR_LOAD;
 	return 0;
       }
@@ -386,7 +391,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
   if (lua_isnil(L, -1)) {  /* Not found? */
     const char *bcname = mksymname(L, name, SYMPREFIX_BC);
     const char *bcdata = ll_bcsym(NULL, bcname);
-    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
       lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
   }
   return 1;

+ 131 - 319
jni/LuaJIT-2.0.1/src/lib_string.c → jni/LuaJIT-2.1/src/lib_string.c

@@ -1,13 +1,11 @@
 /*
 ** String library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 */
 
-#include <stdio.h>
-
 #define lib_string_c
 #define LUA_LIB
 
@@ -18,6 +16,7 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
@@ -25,17 +24,19 @@
 #include "lj_ff.h"
 #include "lj_bcdump.h"
 #include "lj_char.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 
 /* ------------------------------------------------------------------------ */
 
 #define LJLIB_MODULE_string
 
-LJLIB_ASM(string_len)		LJLIB_REC(.)
-{
-  lj_lib_checkstr(L, 1);
-  return FFH_RETRY;
-}
+LJLIB_LUA(string_len) /*
+  function(s)
+    CHECK_str(s)
+    return #s
+  end
+*/
 
 LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
 {
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
   lj_state_checkstack(L, (MSize)n);
   p = (const unsigned char *)strdata(s) + start;
   for (i = 0; i < n; i++)
-    setintV(L->base + i-1, p[i]);
+    setintV(L->base + i-1-LJ_FR2, p[i]);
   return FFH_RES(n);
 }
 
-LJLIB_ASM(string_char)
+LJLIB_ASM(string_char)		LJLIB_REC(.)
 {
   int i, nargs = (int)(L->top - L->base);
-  char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs);
+  char *buf = lj_buf_tmp(L, (MSize)nargs);
   for (i = 1; i <= nargs; i++) {
     int32_t k = lj_lib_checkint(L, i);
     if (!checku8(k))
       lj_err_arg(L, i, LJ_ERR_BADVAL);
     buf[i-1] = (char)k;
   }
-  setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
+  setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
   return FFH_RES(1);
 }
 
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub)		LJLIB_REC(string_range 1)
   return FFH_RETRY;
 }
 
-LJLIB_ASM(string_rep)
+LJLIB_CF(string_rep)		LJLIB_REC(.)
 {
   GCstr *s = lj_lib_checkstr(L, 1);
-  int32_t k = lj_lib_checkint(L, 2);
+  int32_t rep = lj_lib_checkint(L, 2);
   GCstr *sep = lj_lib_optstr(L, 3);
-  int32_t len = (int32_t)s->len;
-  global_State *g = G(L);
-  int64_t tlen;
-  const char *src;
-  char *buf;
-  if (k <= 0) {
-  empty:
-    setstrV(L, L->base-1, &g->strempty);
-    return FFH_RES(1);
+  SBuf *sb = lj_buf_tmp_(L);
+  if (sep && rep > 1) {
+    GCstr *s2 = lj_buf_cat2str(L, sep, s);
+    lj_buf_reset(sb);
+    lj_buf_putstr(sb, s);
+    s = s2;
+    rep--;
   }
-  if (sep) {
-    tlen = (int64_t)len + sep->len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-    tlen *= k;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  } else {
-    tlen = (int64_t)k * len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  }
-  if (tlen == 0) goto empty;
-  buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
-  src = strdata(s);
-  if (sep) {
-    tlen -= sep->len;  /* Ignore trailing separator. */
-    if (k > 1) {  /* Paste one string and one separator. */
-      int32_t i;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = strdata(sep); len = sep->len;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = g->tmpbuf.buf; len += s->len; k--;  /* Now copy that k-1 times. */
-    }
-  }
-  do {
-    int32_t i = 0;
-    do { *buf++ = src[i++]; } while (i < len);
-  } while (--k > 0);
-  setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
-  return FFH_RES(1);
+  sb = lj_buf_putstr_rep(sb, s, rep);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
+  return 1;
 }
 
-LJLIB_ASM(string_reverse)
+LJLIB_ASM(string_reverse)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
 {
-  GCstr *s = lj_lib_checkstr(L, 1);
-  lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
+  lj_lib_checkstr(L, 1);
   return FFH_RETRY;
 }
-LJLIB_ASM_(string_lower)
-LJLIB_ASM_(string_upper)
+LJLIB_ASM_(string_lower)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
+LJLIB_ASM_(string_upper)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
 
 /* ------------------------------------------------------------------------ */
 
-static int writer_buf(lua_State *L, const void *p, size_t size, void *b)
+static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
 {
-  luaL_addlstring((luaL_Buffer *)b, (const char *)p, size);
+  lj_buf_putmem((SBuf *)sb, p, (MSize)size);
   UNUSED(L);
   return 0;
 }
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump)
 {
   GCfunc *fn = lj_lib_checkfunc(L, 1);
   int strip = L->base+1 < L->top && tvistruecond(L->base+1);
-  luaL_Buffer b;
+  SBuf *sb = lj_buf_tmp_(L);  /* Assumes lj_bcwrite() doesn't use tmpbuf. */
   L->top = L->base+1;
-  luaL_buffinit(L, &b);
-  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
+  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
     lj_err_caller(L, LJ_ERR_STRDUMP);
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
 }
 
@@ -183,7 +154,6 @@ typedef struct MatchState {
 } MatchState;
 
 #define L_ESC		'%'
-#define SPECIALS	"^$*+?.([%-"
 
 static int check_capture(MatchState *ms, int l)
 {
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
   return s;
 }
 
-static const char *lmemfind(const char *s1, size_t l1,
-			    const char *s2, size_t l2)
-{
-  if (l2 == 0) {
-    return s1;  /* empty strings are everywhere */
-  } else if (l2 > l1) {
-    return NULL;  /* avoids a negative `l1' */
-  } else {
-    const char *init;  /* to search for a `*s2' inside `s1' */
-    l2--;  /* 1st char will be checked by `memchr' */
-    l1 = l1-l2;  /* `s2' cannot be found after that */
-    while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
-      init++;   /* 1st char is already checked */
-      if (memcmp(init, s2+1, l2) == 0) {
-	return init-1;
-      } else {  /* correct `l1' and `s1' to try again */
-	l1 -= (size_t)(init-s1);
-	s1 = init;
-      }
-    }
-    return NULL;  /* not found */
-  }
-}
-
 static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
 {
   if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
   return nlevels;  /* number of strings pushed */
 }
 
-static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
-{
-  /* relative string position: negative means back from end */
-  if (pos < 0) pos += (ptrdiff_t)len + 1;
-  return (pos >= 0) ? pos : 0;
-}
-
 static int str_find_aux(lua_State *L, int find)
 {
-  size_t l1, l2;
-  const char *s = luaL_checklstring(L, 1, &l1);
-  const char *p = luaL_checklstring(L, 2, &l2);
-  ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
-  if (init < 0) {
-    init = 0;
-  } else if ((size_t)(init) > l1) {
+  GCstr *s = lj_lib_checkstr(L, 1);
+  GCstr *p = lj_lib_checkstr(L, 2);
+  int32_t start = lj_lib_optint(L, 3, 1);
+  MSize st;
+  if (start < 0) start += (int32_t)s->len; else start--;
+  if (start < 0) start = 0;
+  st = (MSize)start;
+  if (st > s->len) {
 #if LJ_52
     setnilV(L->top-1);
     return 1;
 #else
-    init = (ptrdiff_t)l1;
+    st = s->len;
 #endif
   }
-  if (find && (lua_toboolean(L, 4) ||  /* explicit request? */
-      strpbrk(p, SPECIALS) == NULL)) {  /* or no special characters? */
-    /* do a plain search */
-    const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
-    if (s2) {
-      lua_pushinteger(L, s2-s+1);
-      lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
+  if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
+	       !lj_str_haspattern(p))) {  /* Search for fixed string. */
+    const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
+    if (q) {
+      setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
+      setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
       return 2;
     }
-  } else {
+  } else {  /* Search for pattern. */
     MatchState ms;
-    int anchor = (*p == '^') ? (p++, 1) : 0;
-    const char *s1=s+init;
+    const char *pstr = strdata(p);
+    const char *sstr = strdata(s) + st;
+    int anchor = 0;
+    if (*pstr == '^') { pstr++; anchor = 1; }
     ms.L = L;
-    ms.src_init = s;
-    ms.src_end = s+l1;
-    do {
-      const char *res;
+    ms.src_init = strdata(s);
+    ms.src_end = strdata(s) + s->len;
+    do {  /* Loop through string and try to match the pattern. */
+      const char *q;
       ms.level = ms.depth = 0;
-      if ((res=match(&ms, s1, p)) != NULL) {
+      q = match(&ms, sstr, pstr);
+      if (q) {
 	if (find) {
-	  lua_pushinteger(L, s1-s+1);  /* start */
-	  lua_pushinteger(L, res-s);   /* end */
-	  return push_captures(&ms, NULL, 0) + 2;
+	  setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
+	  setintV(L->top++, (int32_t)(q-strdata(s)));
+	  return push_captures(&ms, NULL, NULL) + 2;
 	} else {
-	  return push_captures(&ms, s1, res);
+	  return push_captures(&ms, sstr, q);
 	}
       }
-    } while (s1++ < ms.src_end && !anchor);
+    } while (sstr++ < ms.src_end && !anchor);
   }
-  lua_pushnil(L);  /* not found */
+  setnilV(L->top-1);  /* Not found. */
   return 1;
 }
 
-LJLIB_CF(string_find)
+LJLIB_CF(string_find)		LJLIB_REC(.)
 {
   return str_find_aux(L, 1);
 }
@@ -698,221 +640,91 @@ LJLIB_CF(string_gsub)
 
 /* ------------------------------------------------------------------------ */
 
-/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
-#define MAX_FMTITEM	512
-/* valid flags in a format specification */
-#define FMT_FLAGS	"-+ #0"
-/*
-** maximum size of each format specification (such as '%-099.99d')
-** (+10 accounts for %99.99x plus margin of error)
-*/
-#define MAX_FMTSPEC	(sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
-
-static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
-{
-  GCstr *str = lj_lib_checkstr(L, arg);
-  int32_t len = (int32_t)str->len;
-  const char *s = strdata(str);
-  luaL_addchar(b, '"');
-  while (len--) {
-    uint32_t c = uchar(*s);
-    if (c == '"' || c == '\\' || c == '\n') {
-      luaL_addchar(b, '\\');
-    } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
-      uint32_t d;
-      luaL_addchar(b, '\\');
-      if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
-	luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
-	goto tens;
-      } else if (c >= 10) {
-      tens:
-	d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
-      }
-      c += '0';
-    }
-    luaL_addchar(b, c);
-    s++;
-  }
-  luaL_addchar(b, '"');
-}
-
-static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
-{
-  const char *p = strfrmt;
-  while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++;  /* skip flags */
-  if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
-    lj_err_caller(L, LJ_ERR_STRFMTR);
-  if (lj_char_isdigit(uchar(*p))) p++;  /* skip width */
-  if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  if (*p == '.') {
-    p++;
-    if (lj_char_isdigit(uchar(*p))) p++;  /* skip precision */
-    if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  }
-  if (lj_char_isdigit(uchar(*p)))
-    lj_err_caller(L, LJ_ERR_STRFMTW);
-  *(form++) = '%';
-  strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
-  form += p - strfrmt + 1;
-  *form = '\0';
-  return p;
-}
-
-static void addintlen(char *form)
-{
-  size_t l = strlen(form);
-  char spec = form[l - 1];
-  strcpy(form + l - 1, LUA_INTFRMLEN);
-  form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
-  form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
-}
-
-static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (LUA_INTFRM_T)intV(o);
-    else
-      return (LUA_INTFRM_T)numV(o);
-  }
-}
-
-static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (unsigned LUA_INTFRM_T)intV(o);
-    else if ((int32_t)o->u32.hi < 0)
-      return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
-    else
-      return (unsigned LUA_INTFRM_T)numV(o);
-  }
-}
-
-static GCstr *meta_tostring(lua_State *L, int arg)
+/* Emulate tostring() inline. */
+static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
 {
   TValue *o = L->base+arg-1;
   cTValue *mo;
   lua_assert(o < L->top);  /* Caller already checks for existence. */
   if (LJ_LIKELY(tvisstr(o)))
     return strV(o);
-  if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+  if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
     copyTV(L, L->top++, mo);
     copyTV(L, L->top++, o);
     lua_call(L, 1, 1);
-    L->top--;
-    if (tvisstr(L->top))
-      return strV(L->top);
-    o = L->base+arg-1;
-    copyTV(L, o, L->top);
-  }
-  if (tvisnumber(o)) {
-    return lj_str_fromnumber(L, o);
-  } else if (tvisnil(o)) {
-    return lj_str_newlit(L, "nil");
-  } else if (tvisfalse(o)) {
-    return lj_str_newlit(L, "false");
-  } else if (tvistrue(o)) {
-    return lj_str_newlit(L, "true");
-  } else {
-    if (tvisfunc(o) && isffunc(funcV(o)))
-      lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
-    else
-      lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
-    L->top--;
-    return strV(L->top);
+    copyTV(L, L->base+arg-1, --L->top);
+    return NULL;  /* Buffer may be overwritten, retry. */
   }
-}
-
-LJLIB_CF(string_format)
-{
-  int arg = 1, top = (int)(L->top - L->base);
-  GCstr *fmt = lj_lib_checkstr(L, arg);
-  const char *strfrmt = strdata(fmt);
-  const char *strfrmt_end = strfrmt + fmt->len;
-  luaL_Buffer b;
-  luaL_buffinit(L, &b);
-  while (strfrmt < strfrmt_end) {
-    if (*strfrmt != L_ESC) {
-      luaL_addchar(&b, *strfrmt++);
-    } else if (*++strfrmt == L_ESC) {
-      luaL_addchar(&b, *strfrmt++);  /* %% */
-    } else { /* format item */
-      char form[MAX_FMTSPEC];  /* to store the format (`%...') */
-      char buff[MAX_FMTITEM];  /* to store the formatted item */
+  return lj_strfmt_obj(L, o);
+}
+
+LJLIB_CF(string_format)		LJLIB_REC(.)
+{
+  int arg, top = (int)(L->top - L->base);
+  GCstr *fmt;
+  SBuf *sb;
+  FormatState fs;
+  SFormat sf;
+  int retry = 0;
+again:
+  arg = 1;
+  sb = lj_buf_tmp_(L);
+  fmt = lj_lib_checkstr(L, arg);
+  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+    if (sf == STRFMT_LIT) {
+      lj_buf_putmem(sb, fs.str, fs.len);
+    } else if (sf == STRFMT_ERR) {
+      lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
+    } else {
       if (++arg > top)
 	luaL_argerror(L, arg, lj_obj_typename[0]);
-      strfrmt = scanformat(L, strfrmt, form);
-      switch (*strfrmt++) {
-      case 'c':
-	sprintf(buff, form, lj_lib_checkint(L, arg));
+      switch (STRFMT_TYPE(sf)) {
+      case STRFMT_INT:
+	if (tvisint(L->base+arg-1)) {
+	  int32_t k = intV(L->base+arg-1);
+	  if (sf == STRFMT_INT)
+	    lj_strfmt_putint(sb, k);  /* Shortcut for plain %d. */
+	  else
+	    lj_strfmt_putfxint(sb, sf, k);
+	} else {
+	  lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+	}
 	break;
-      case 'd':  case 'i':
-	addintlen(form);
-	sprintf(buff, form, num2intfrm(L, arg));
+      case STRFMT_UINT:
+	if (tvisint(L->base+arg-1))
+	  lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
+	else
+	  lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
 	break;
-      case 'o':  case 'u':  case 'x':  case 'X':
-	addintlen(form);
-	sprintf(buff, form, num2uintfrm(L, arg));
+      case STRFMT_NUM:
+	lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
 	break;
-      case 'e':  case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
-	TValue tv;
-	tv.n = lj_lib_checknum(L, arg);
-	if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
-	  /* Canonicalize output of non-finite values. */
-	  char *p, nbuf[LJ_STR_NUMBUF];
-	  size_t len = lj_str_bufnum(nbuf, &tv);
-	  if (strfrmt[-1] < 'a') {
-	    nbuf[len-3] = nbuf[len-3] - 0x20;
-	    nbuf[len-2] = nbuf[len-2] - 0x20;
-	    nbuf[len-1] = nbuf[len-1] - 0x20;
-	  }
-	  nbuf[len] = '\0';
-	  for (p = form; *p < 'A' && *p != '.'; p++) ;
-	  *p++ = 's'; *p = '\0';
-	  sprintf(buff, form, nbuf);
-	  break;
-	}
-	sprintf(buff, form, (double)tv.n);
+      case STRFMT_STR: {
+	GCstr *str = string_fmt_tostring(L, arg, retry);
+	if (str == NULL)
+	  retry = 1;
+	else if ((sf & STRFMT_T_QUOTED))
+	  lj_strfmt_putquoted(sb, str);  /* No formatting. */
+	else
+	  lj_strfmt_putfstr(sb, sf, str);
 	break;
 	}
-      case 'q':
-	addquoted(L, &b, arg);
-	continue;
-      case 'p':
-	lj_str_pushf(L, "%p", lua_topointer(L, arg));
-	luaL_addvalue(&b);
-	continue;
-      case 's': {
-	GCstr *str = meta_tostring(L, arg);
-	if (!strchr(form, '.') && str->len >= 100) {
-	  /* no precision and string is too long to be formatted;
-	     keep original string */
-	  setstrV(L, L->top++, str);
-	  luaL_addvalue(&b);
-	  continue;
-	}
-	sprintf(buff, form, strdata(str));
+      case STRFMT_CHAR:
+	lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+	break;
+      case STRFMT_PTR:  /* No formatting. */
+	lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
 	break;
-	}
       default:
-	lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
+	lua_assert(0);
 	break;
       }
-      luaL_addlstring(&b, buff, strlen(buff));
     }
   }
-  luaL_pushresult(&b);
+  if (retry++ == 1) goto again;
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
 }
 

+ 90 - 83
jni/LuaJIT-2.0.1/src/lib_table.c → jni/LuaJIT-2.1/src/lib_table.c

@@ -1,6 +1,6 @@
 /*
 ** Table library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -16,57 +16,43 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_tab.h"
+#include "lj_ff.h"
 #include "lj_lib.h"
 
 /* ------------------------------------------------------------------------ */
 
 #define LJLIB_MODULE_table
 
-LJLIB_CF(table_foreachi)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  MSize i, n = lj_tab_len(t);
-  for (i = 1; i <= n; i++) {
-    cTValue *val;
-    setfuncV(L, L->top, func);
-    setintV(L->top+1, i);
-    val = lj_tab_getint(t, (int32_t)i);
-    if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreachi) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for i=1,#t do
+      local r = f(i, t[i])
+      if r ~= nil then return r end
+    end
+  end
+*/
 
-LJLIB_CF(table_foreach)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  L->top = L->base+3;
-  setnilV(L->top-1);
-  while (lj_tab_next(L, t, L->top-1)) {
-    copyTV(L, L->top+2, L->top);
-    copyTV(L, L->top+1, L->top-1);
-    setfuncV(L, L->top, func);
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreach) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for k, v in PAIRS(t) do
+      local r = f(k, v)
+      if r ~= nil then return r end
+    end
+  end
+*/
 
-LJLIB_ASM(table_getn)		LJLIB_REC(.)
-{
-  lj_lib_checktab(L, 1);
-  return FFH_UNREACHABLE;
-}
+LJLIB_LUA(table_getn) /*
+  function(t)
+    CHECK_tab(t)
+    return #t
+  end
+*/
 
 LJLIB_CF(table_maxn)
 {
@@ -119,52 +105,47 @@ LJLIB_CF(table_insert)		LJLIB_REC(.)
   return 0;
 }
 
-LJLIB_CF(table_remove)		LJLIB_REC(.)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  int32_t e = (int32_t)lj_tab_len(t);
-  int32_t pos = lj_lib_optint(L, 2, e);
-  if (!(1 <= pos && pos <= e))  /* Nothing to remove? */
-    return 0;
-  lua_rawgeti(L, 1, pos);  /* Get previous value. */
-  /* NOBARRIER: This just moves existing elements around. */
-  for (; pos < e; pos++) {
-    cTValue *src = lj_tab_getint(t, pos+1);
-    TValue *dst = lj_tab_setint(L, t, pos);
-    if (src) {
-      copyTV(L, dst, src);
-    } else {
-      setnilV(dst);
-    }
-  }
-  setnilV(lj_tab_setint(L, t, e));  /* Remove (last) value. */
-  return 1;  /* Return previous value. */
-}
+LJLIB_LUA(table_remove) /*
+  function(t, pos)
+    CHECK_tab(t)
+    local len = #t
+    if pos == nil then
+      if len ~= 0 then
+	local old = t[len]
+	t[len] = nil
+	return old
+      end
+    else
+      CHECK_int(pos)
+      if pos >= 1 and pos <= len then
+	local old = t[pos]
+	for i=pos+1,len do
+	  t[i-1] = t[i]
+	end
+	t[len] = nil
+	return old
+      end
+    end
+  end
+*/
 
-LJLIB_CF(table_concat)
+LJLIB_CF(table_concat)		LJLIB_REC(.)
 {
-  luaL_Buffer b;
   GCtab *t = lj_lib_checktab(L, 1);
   GCstr *sep = lj_lib_optstr(L, 2);
-  MSize seplen = sep ? sep->len : 0;
   int32_t i = lj_lib_optint(L, 3, 1);
-  int32_t e = L->base+3 < L->top ? lj_lib_checkint(L, 4) :
-				   (int32_t)lj_tab_len(t);
-  luaL_buffinit(L, &b);
-  if (i <= e) {
-    for (;;) {
-      cTValue *o;
-      lua_rawgeti(L, 1, i);
-      o = L->top-1;
-      if (!(tvisstr(o) || tvisnumber(o)))
-	lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
-      luaL_addvalue(&b);
-      if (i++ == e) break;
-      if (seplen)
-	luaL_addlstring(&b, strdata(sep), seplen);
-    }
+  int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
+	      lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
+  SBuf *sb = lj_buf_tmp_(L);
+  SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
+  if (LJ_UNLIKELY(!sbx)) {  /* Error: bad element type. */
+    int32_t idx = (int32_t)(intptr_t)sbufP(sb);
+    cTValue *o = lj_tab_getint(t, idx);
+    lj_err_callerv(L, LJ_ERR_TABCAT,
+		   lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
   }
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sbx));
+  lj_gc_check(L);
   return 1;
 }
 
@@ -284,6 +265,30 @@ LJLIB_CF(table_pack)
 }
 #endif
 
+LJLIB_NOREG LJLIB_CF(table_new)		LJLIB_REC(.)
+{
+  int32_t a = lj_lib_checkint(L, 1);
+  int32_t h = lj_lib_checkint(L, 2);
+  lua_createtable(L, a, h);
+  return 1;
+}
+
+LJLIB_NOREG LJLIB_CF(table_clear)	LJLIB_REC(.)
+{
+  lj_tab_clear(lj_lib_checktab(L, 1));
+  return 0;
+}
+
+static int luaopen_table_new(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
+}
+
+static int luaopen_table_clear(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
+}
+
 /* ------------------------------------------------------------------------ */
 
 #include "lj_libdef.h"
@@ -295,6 +300,8 @@ LUALIB_API int luaopen_table(lua_State *L)
   lua_getglobal(L, "unpack");
   lua_setfield(L, -2, "unpack");
 #endif
+  lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
+  lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
   return 1;
 }
 

+ 15 - 0
jni/LuaJIT-2.0.1/src/lj.supp → jni/LuaJIT-2.1/src/lj.supp

@@ -24,3 +24,18 @@
    Memcheck:Cond
    fun:lj_str_new
 }
+{
+   Optimized string compare
+   Memcheck:Addr4
+   fun:lj_str_fastcmp
+}
+{
+   Optimized string compare
+   Memcheck:Addr1
+   fun:lj_str_fastcmp
+}
+{
+   Optimized string compare
+   Memcheck:Cond
+   fun:lj_str_fastcmp
+}

+ 30 - 13
jni/LuaJIT-2.0.1/src/lj_alloc.c → jni/LuaJIT-2.1/src/lj_alloc.c

@@ -77,7 +77,7 @@
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 
 /* Undocumented, but hey, that's what we all love so much about Windows. */
 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
@@ -174,42 +174,55 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
 #endif
 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
 
-#if LJ_64
-/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
+#if LJ_64 && !LJ_GC64
+/* 64 bit mode with 32 bit pointers needs special support for allocating
+** memory in the lower 2GB.
+*/
 
-#if LJ_TARGET_LINUX
+#if defined(MAP_32BIT)
 
+#if defined(__sun__)
+#define MMAP_REGION_START	((uintptr_t)0x1000)
+#else
 /* Actually this only gives us max. 1GB in current Linux kernels. */
+#define MMAP_REGION_START	((uintptr_t)0)
+#endif
+
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 {
   int olderr = errno;
-  void *ptr = mmap(NULL, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
+  void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
   errno = olderr;
   return ptr;
 }
 
-#elif LJ_TARGET_OSX || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__OpenBSD__)
+#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || defined(__CYGWIN__)
 
 /* OSX and FreeBSD mmap() use a naive first-fit linear search.
 ** That's perfect for us. Except that -pagezero_size must be set for OSX,
 ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
 ** to be reduced to 250MB on FreeBSD.
 */
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__OpenBSD__)
-#include <sys/resource.h>
-#define MMAP_REGION_START	((uintptr_t)0x10000000)
-#else
+#if LJ_TARGET_OSX || defined(__DragonFly__)
 #define MMAP_REGION_START	((uintptr_t)0x10000)
+#elif LJ_TARGET_PS4
+#define MMAP_REGION_START	((uintptr_t)0x4000)
+#else
+#define MMAP_REGION_START	((uintptr_t)0x10000000)
 #endif
 #define MMAP_REGION_END		((uintptr_t)0x80000000)
 
+#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
+#include <sys/resource.h>
+#endif
+
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 {
   int olderr = errno;
   /* Hint for next allocation. Doesn't need to be thread-safe. */
   static uintptr_t alloc_hint = MMAP_REGION_START;
   int retry = 0;
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
   static int rlimit_modified = 0;
   if (LJ_UNLIKELY(rlimit_modified == 0)) {
     struct rlimit rlim;
@@ -227,6 +240,10 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
       return p;
     }
     if (p != CMFAIL) munmap(p, size);
+#if defined(__sun__) || defined(__DragonFly__)
+    alloc_hint += 0x1000000;  /* Need near-exhaustive linear scan. */
+    if (alloc_hint + size < MMAP_REGION_END) continue;
+#endif
     if (retry) break;
     retry = 1;
     alloc_hint = MMAP_REGION_START;
@@ -243,7 +260,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
 
 #else
 
-/* 32 bit mode is easy. */
+/* 32 bit mode and GC64 mode is easy. */
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 {
   int olderr = errno;
@@ -279,7 +296,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_MAYMOVE	1
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #else
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE

+ 0 - 0
jni/LuaJIT-2.0.1/src/lj_alloc.h → jni/LuaJIT-2.1/src/lj_alloc.h


+ 76 - 63
jni/LuaJIT-2.0.1/src/lj_api.c → jni/LuaJIT-2.1/src/lj_api.c

@@ -1,6 +1,6 @@
 /*
 ** Public Lua/C API.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -24,6 +24,7 @@
 #include "lj_trace.h"
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 /* -- Common helper functions --------------------------------------------- */
 
@@ -188,7 +189,7 @@ LUA_API int lua_type(lua_State *L, int idx)
   cTValue *o = index2adr(L, idx);
   if (tvisnumber(o)) {
     return LUA_TNUMBER;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o)) {
     return LUA_TLIGHTUSERDATA;
 #endif
@@ -268,7 +269,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     return 0;
   } else if (tvispri(o1)) {
     return o1 != niltv(L) && o2 != niltv(L);
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o1)) {
     return o1->u64 == o2->u64;
 #endif
@@ -283,8 +284,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     } else {
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
   }
 }
@@ -306,8 +307,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
     } else {
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
   }
 }
@@ -434,7 +435,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
   } else {
     if (len != NULL) *len = 0;
@@ -453,7 +454,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +476,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +508,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
   } else if (tvisudata(o)) {
     return udataV(o)->len;
   } else if (tvisnumber(o)) {
-    GCstr *s = lj_str_fromnumber(L, o);
+    GCstr *s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     return s->len;
   } else {
@@ -545,17 +546,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
 
 LUA_API const void *lua_topointer(lua_State *L, int idx)
 {
-  cTValue *o = index2adr(L, idx);
-  if (tvisudata(o))
-    return uddata(udataV(o));
-  else if (tvislightud(o))
-    return lightudV(o);
-  else if (tviscdata(o))
-    return cdataptr(cdataV(o));
-  else if (tvisgcv(o))
-    return gcV(o);
-  else
-    return NULL;
+  return lj_obj_ptr(index2adr(L, idx));
 }
 
 /* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +597,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
 				     va_list argp)
 {
   lj_gc_check(L);
-  return lj_str_pushvf(L, fmt, argp);
+  return lj_strfmt_pushvf(L, fmt, argp);
 }
 
 LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +606,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
   va_list argp;
   lj_gc_check(L);
   va_start(argp, fmt);
-  ret = lj_str_pushvf(L, fmt, argp);
+  ret = lj_strfmt_pushvf(L, fmt, argp);
   va_end(argp);
   return ret;
 }
@@ -649,10 +640,8 @@ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
 
 LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
 {
-  GCtab *t;
   lj_gc_check(L);
-  t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec));
-  settabV(L, L->top, t);
+  settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
   incr_top(L);
 }
 
@@ -715,8 +704,8 @@ LUA_API void lua_concat(lua_State *L, int n)
       n -= (int)(L->top - top);
       L->top = top+2;
       lj_vm_call(L, top, 1+1);
-      L->top--;
-      copyTV(L, L->top-1, L->top);
+      L->top -= 1+LJ_FR2;
+      copyTV(L, L->top-1, L->top+LJ_FR2);
     } while (--n > 0);
   } else if (n == 0) {  /* Push empty string. */
     setstrV(L, L->top, &G(L)->strempty);
@@ -735,8 +724,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
   if (v == NULL) {
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   copyTV(L, L->top-1, v);
 }
@@ -751,8 +740,8 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
   if (v == NULL) {
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   copyTV(L, L->top, v);
   incr_top(L);
@@ -893,13 +882,14 @@ LUA_API void lua_settable(lua_State *L, int idx)
   o = lj_meta_tset(L, t, L->top-2);
   if (o) {
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top-1);
     L->top -= 2;
+    copyTV(L, o, L->top+1);
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 3;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 3+LJ_FR2;
   }
 }
 
@@ -913,14 +903,14 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
   setstrV(L, &key, lj_str_newz(L, k));
   o = lj_meta_tset(L, t, &key);
   if (o) {
-    L->top--;
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top);
+    copyTV(L, o, --L->top);
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 2;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 2+LJ_FR2;
   }
 }
 
@@ -1027,11 +1017,24 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
 
 /* -- Calls --------------------------------------------------------------- */
 
+#if LJ_FR2
+static TValue *api_call_base(lua_State *L, int nargs)
+{
+  TValue *o = L->top, *base = o - nargs;
+  L->top = o+1;
+  for (; o > base; o--) copyTV(L, o, o-1);
+  setnilV(o);
+  return o+1;
+}
+#else
+#define api_call_base(L, nargs)	(L->top - (nargs))
+#endif
+
 LUA_API void lua_call(lua_State *L, int nargs, int nresults)
 {
   api_check(L, L->status == 0 || L->status == LUA_ERRERR);
   api_checknelems(L, nargs+1);
-  lj_vm_call(L, L->top - nargs, nresults+1);
+  lj_vm_call(L, api_call_base(L, nargs), nresults+1);
 }
 
 LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1049,7 +1052,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
     api_checkvalidindex(L, o);
     ef = savestack(L, o);
   }
-  status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
+  status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
   if (status) hook_restore(g, oldh);
   return status;
 }
@@ -1057,12 +1060,14 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
 static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
 {
   GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
+  TValue *top = L->top;
   fn->c.f = func;
-  setfuncV(L, L->top, fn);
-  setlightudV(L->top+1, checklightudptr(L, ud));
+  setfuncV(L, top++, fn);
+  if (LJ_FR2) setnilV(top++);
+  setlightudV(top++, checklightudptr(L, ud));
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */
-  L->top += 2;
-  return L->top-1;  /* Now call the newly allocated C function. */
+  L->top = top;
+  return top-1;  /* Now call the newly allocated C function. */
 }
 
 LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1079,10 +1084,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
 LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 {
   if (luaL_getmetafield(L, idx, field)) {
-    TValue *base = L->top--;
-    copyTV(L, base, index2adr(L, idx));
-    L->top = base+1;
-    lj_vm_call(L, base, 1+1);
+    TValue *top = L->top--;
+    if (LJ_FR2) setnilV(top++);
+    copyTV(L, top++, index2adr(L, idx));
+    L->top = top;
+    lj_vm_call(L, top-1, 1+1);
     return 1;
   }
   return 0;
@@ -1109,12 +1115,14 @@ LUA_API int lua_yield(lua_State *L, int nresults)
     } else {  /* Yield from hook: add a pseudo-frame. */
       TValue *top = L->top;
       hook_leave(g);
-      top->u64 = cframe_multres(cf);
-      setcont(top+1, lj_cont_hook);
-      setframe_pc(top+1, cframe_pc(cf)-1);
-      setframe_gc(top+2, obj2gco(L));
-      setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT);
-      L->top = L->base = top+3;
+      (top++)->u64 = cframe_multres(cf);
+      setcont(top, lj_cont_hook);
+      if (LJ_FR2) top++;
+      setframe_pc(top, cframe_pc(cf)-1);
+      if (LJ_FR2) top++;
+      setframe_gc(top, obj2gco(L), LJ_TTHREAD);
+      setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
+      L->top = L->base = top+1;
 #if LJ_TARGET_X64
       lj_err_throw(L, LUA_YIELD);
 #else
@@ -1131,7 +1139,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
 LUA_API int lua_resume(lua_State *L, int nargs)
 {
   if (L->cframe == NULL && L->status <= LUA_YIELD)
-    return lj_vm_resume(L, L->top - nargs, 0, 0);
+    return lj_vm_resume(L,
+      L->status == 0 ? api_call_base(L, nargs) : L->top - nargs,
+      0, 0);
   L->top = L->base;
   setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
   incr_top(L);
@@ -1161,10 +1171,10 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
     res = (int)(g->gc.total & 0x3ff);
     break;
   case LUA_GCSTEP: {
-    MSize a = (MSize)data << 10;
+    GCSize a = (GCSize)data << 10;
     g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
     while (g->gc.total >= g->gc.threshold)
-      if (lj_gc_step(L)) {
+      if (lj_gc_step(L) > 0) {
 	res = 1;
 	break;
       }
@@ -1178,6 +1188,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
     res = (int)(g->gc.stepmul);
     g->gc.stepmul = (MSize)data;
     break;
+  case LUA_GCISRUNNING:
+    res = (g->gc.threshold != LJ_MAX_MEM);
+    break;
   default:
     res = -1;  /* Invalid option. */
   }

+ 146 - 49
jni/LuaJIT-2.0.1/src/lj_arch.h → jni/LuaJIT-2.1/src/lj_arch.h

@@ -1,6 +1,6 @@
 /*
 ** Target architecture selection.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_ARCH_H
@@ -19,10 +19,10 @@
 #define LUAJIT_ARCH_x64		2
 #define LUAJIT_ARCH_ARM		3
 #define LUAJIT_ARCH_arm		3
-#define LUAJIT_ARCH_PPC		4
-#define LUAJIT_ARCH_ppc		4
-#define LUAJIT_ARCH_PPCSPE	5
-#define LUAJIT_ARCH_ppcspe	5
+#define LUAJIT_ARCH_ARM64	4
+#define LUAJIT_ARCH_arm64	4
+#define LUAJIT_ARCH_PPC		5
+#define LUAJIT_ARCH_ppc		5
 #define LUAJIT_ARCH_MIPS	6
 #define LUAJIT_ARCH_mips	6
 
@@ -43,12 +43,10 @@
 #define LUAJIT_TARGET	LUAJIT_ARCH_X64
 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM
+#elif defined(__aarch64__)
+#define LUAJIT_TARGET	LUAJIT_ARCH_ARM64
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
-#ifdef __NO_FPRS__
-#define LUAJIT_TARGET	LUAJIT_ARCH_PPCSPE
-#else
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
-#endif
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS
 #else
@@ -66,8 +64,9 @@
 #define LUAJIT_OS	LUAJIT_OS_LINUX
 #elif defined(__MACH__) && defined(__APPLE__)
 #define LUAJIT_OS	LUAJIT_OS_OSX
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
-      defined(__NetBSD__) || defined(__OpenBSD__)
+#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
+       defined(__NetBSD__) || defined(__OpenBSD__) || \
+       defined(__DragonFly__)) && !defined(__ORBIS__)
 #define LUAJIT_OS	LUAJIT_OS_BSD
 #elif (defined(__sun__) && defined(__svr4__)) || defined(__CYGWIN__)
 #define LUAJIT_OS	LUAJIT_OS_POSIX
@@ -95,7 +94,7 @@
 #define LJ_TARGET_WINDOWS	(LUAJIT_OS == LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_LINUX		(LUAJIT_OS == LUAJIT_OS_LINUX)
 #define LJ_TARGET_OSX		(LUAJIT_OS == LUAJIT_OS_OSX)
-#define LJ_TARGET_IOS		(LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM)
+#define LJ_TARGET_IOS		(LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
 #define LJ_TARGET_POSIX		(LUAJIT_OS > LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_DLOPEN	LJ_TARGET_POSIX
 
@@ -104,11 +103,29 @@
 #define LJ_TARGET_CONSOLE	1
 #endif
 
+#ifdef __ORBIS__
+#define LJ_TARGET_PS4		1
+#define LJ_TARGET_CONSOLE	1
+#undef NULL
+#define NULL ((void*)0)
+#endif
+
+#ifdef __psp2__
+#define LJ_TARGET_PSVITA	1
+#define LJ_TARGET_CONSOLE	1
+#endif
+
 #if _XBOX_VER >= 200
 #define LJ_TARGET_XBOX360	1
 #define LJ_TARGET_CONSOLE	1
 #endif
 
+#ifdef _DURANGO
+#define LJ_TARGET_XBOXONE	1
+#define LJ_TARGET_CONSOLE	1
+#define LJ_TARGET_GC64		1
+#endif
+
 #define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
 #define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
 #define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
@@ -138,7 +155,11 @@
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#define LJ_ABI_WIN		LJ_TARGET_WINDOWS
+#if LJ_TARGET_WINDOWS || __CYGWIN__
+#define LJ_ABI_WIN		1
+#else
+#define LJ_ABI_WIN		0
+#endif
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
@@ -147,6 +168,9 @@
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNALIGNED	1
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
+#ifdef LUAJIT_ENABLE_GC64
+#define LJ_TARGET_GC64		1
+#endif
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
 
@@ -168,7 +192,9 @@
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 
-#if __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__
+#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
+#define LJ_ARCH_VERSION		80
+#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
 #define LJ_ARCH_VERSION		70
 #elif __ARM_ARCH_6T2__
 #define LJ_ARCH_VERSION		61
@@ -178,15 +204,45 @@
 #define LJ_ARCH_VERSION		50
 #endif
 
+#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
+
+#define LJ_ARCH_NAME		"arm64"
+#define LJ_ARCH_BITS		64
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#define LJ_TARGET_ARM64		1
+#define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_JUMPRANGE	27	/* +-2^27 = +-128MB */
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+#define LJ_ARCH_NOJIT		1	/* NYI */
+
+#define LJ_ARCH_VERSION		80
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 
-#define LJ_ARCH_NAME		"ppc"
+#ifndef LJ_ARCH_ENDIAN
+#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#else
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#endif
+#endif
+
 #if _LP64
 #define LJ_ARCH_BITS		64
+#if LJ_ARCH_ENDIAN == LUAJIT_LE
+#define LJ_ARCH_NAME		"ppc64le"
+#else
+#define LJ_ARCH_NAME		"ppc64"
+#endif
 #else
 #define LJ_ARCH_BITS		32
+#define LJ_ARCH_NAME		"ppc"
 #endif
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
+
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_EHRETREG	3
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
@@ -195,6 +251,15 @@
 #define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
 
+#if LJ_TARGET_CONSOLE
+#define LJ_ARCH_PPC32ON64	1
+#define LJ_ARCH_NOFFI		1
+#elif LJ_ARCH_BITS == 64
+#define LJ_ARCH_PPC64		1
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NOJIT		1	/* NYI */
+#endif
+
 #if _ARCH_PWR7
 #define LJ_ARCH_VERSION		70
 #elif _ARCH_PWR6
@@ -208,10 +273,6 @@
 #else
 #define LJ_ARCH_VERSION		0
 #endif
-#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
-#define LJ_ARCH_PPC64		1
-#define LJ_ARCH_NOFFI		1
-#endif
 #if _ARCH_PPCSQ
 #define LJ_ARCH_SQRT		1
 #endif
@@ -225,25 +286,6 @@
 #define LJ_ARCH_XENON		1
 #endif
 
-#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
-
-#define LJ_ARCH_NAME		"ppcspe"
-#define LJ_ARCH_BITS		32
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
-#ifndef LJ_ABI_SOFTFP
-#define LJ_ABI_SOFTFP		1
-#endif
-#define LJ_ABI_EABI		1
-#define LJ_TARGET_PPCSPE	1
-#define LJ_TARGET_EHRETREG	3
-#define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
-#define LJ_TARGET_MASKSHIFT	0
-#define LJ_TARGET_MASKROT	1
-#define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
-#define LJ_ARCH_NOFFI		1	/* NYI: comparisons, calls. */
-#define LJ_ARCH_NOJIT		1
-
 #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
 
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
@@ -290,6 +332,16 @@
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
 #error "Need at least GCC 4.2 or newer"
 #endif
+#elif LJ_TARGET_ARM64
+#if __clang__
+#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+#error "Need at least Clang 3.5 or newer"
+#endif
+#else
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
+#error "Need at least GCC 4.8 or newer"
+#endif
+#endif
 #elif !LJ_TARGET_PS3
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #error "Need at least GCC 4.3 or newer"
@@ -313,20 +365,33 @@
 #if !(__ARM_EABI__ || LJ_TARGET_IOS)
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
 #endif
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+#if defined(__AARCH64EB__)
+#error "No support for big-endian ARM64"
+#endif
+#if defined(_ILP32)
+#error "No support for ILP32 model on ARM64"
+#endif
+#elif LJ_TARGET_PPC
 #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
 #error "No support for PowerPC CPUs without double-precision FPU"
 #endif
-#if defined(_LITTLE_ENDIAN)
-#error "No support for little-endian PowerPC"
+#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
+#error "No support for little-endian PPC32"
 #endif
-#if defined(_LP64)
-#error "No support for PowerPC 64 bit mode"
+#if LJ_ARCH_PPC64
+#error "No support for PowerPC 64 bit mode (yet)"
+#endif
+#ifdef __NO_FPRS__
+#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
 #endif
 #elif LJ_TARGET_MIPS
 #if defined(__mips_soft_float)
 #error "No support for MIPS CPUs without FPU"
 #endif
+#if defined(_LP64)
+#error "No support for MIPS64"
+#endif
 #endif
 #endif
 
@@ -351,8 +416,22 @@
 #endif
 #endif
 
+/* 64 bit GC references. */
+#if LJ_TARGET_GC64
+#define LJ_GC64			1
+#else
+#define LJ_GC64			0
+#endif
+
+/* 2-slot frame info. */
+#if LJ_GC64
+#define LJ_FR2			1
+#else
+#define LJ_FR2			0
+#endif
+
 /* Disable or enable the JIT compiler. */
-#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
+#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
 #define LJ_HASJIT		0
 #else
 #define LJ_HASJIT		1
@@ -365,6 +444,21 @@
 #define LJ_HASFFI		1
 #endif
 
+#if defined(LUAJIT_DISABLE_PROFILE)
+#define LJ_HASPROFILE		0
+#elif LJ_TARGET_POSIX
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_SIGPROF	1
+#elif LJ_TARGET_PS3
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_PTHREAD	1
+#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_WTHREAD	1
+#else
+#define LJ_HASPROFILE		0
+#endif
+
 #ifndef LJ_ARCH_HASFPU
 #define LJ_ARCH_HASFPU		1
 #endif
@@ -397,15 +491,18 @@
 #define LJ_TARGET_UNALIGNED	0
 #endif
 
-/* Various workarounds for embedded operating systems. */
-#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
+/* Various workarounds for embedded operating systems or weak C runtimes. */
+#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_LOG2
 #endif
-#if defined(__symbian__)
+#if defined(__symbian__) || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_EXP2
 #endif
+#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
+#define LJ_NO_SYSTEM		1
+#endif
 
-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3
+#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
 #define LJ_NO_UNWIND		1
 #endif
 

+ 471 - 79
jni/LuaJIT-2.0.1/src/lj_asm.c → jni/LuaJIT-2.1/src/lj_asm.c

@@ -1,6 +1,6 @@
 /*
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_asm_c
@@ -30,6 +30,10 @@
 #include "lj_vm.h"
 #include "lj_target.h"
 
+#ifdef LUA_USE_ASSERT
+#include <stdio.h>
+#endif
+
 /* -- Assembler state and common macros ----------------------------------- */
 
 /* Assembler state. */
@@ -38,6 +42,9 @@ typedef struct ASMState {
 
   MCode *mcp;		/* Current MCode pointer (grows down). */
   MCode *mclim;		/* Lower limit for MCode memory + red zone. */
+#ifdef LUA_USE_ASSERT
+  MCode *mcp_prev;	/* Red zone overflow check. */
+#endif
 
   IRIns *ir;		/* Copy of pointer to IR instructions/constants. */
   jit_State *J;		/* JIT compiler state. */
@@ -110,14 +117,28 @@ typedef struct ASMState {
 
 /* Sparse limit checks using a red zone before the actual limit. */
 #define MCLIM_REDZONE	64
-#define checkmclim(as) \
-  if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as)
 
 static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
 {
   lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
 }
 
+static LJ_AINLINE void checkmclim(ASMState *as)
+{
+#ifdef LUA_USE_ASSERT
+  if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
+    IRIns *ir = IR(as->curins+1);
+    fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d  %02d %04d %04d\n", as->mcp,
+	    as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
+    lua_assert(0);
+  }
+#endif
+  if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
+#ifdef LUA_USE_ASSERT
+  as->mcp_prev = as->mcp;
+#endif
+}
+
 #ifdef RID_NUM_KREF
 #define ra_iskref(ref)		((ref) < RID_NUM_KREF)
 #define ra_krefreg(ref)		((Reg)(RID_MIN_KREF + (Reg)(ref)))
@@ -158,6 +179,12 @@ IRFLDEF(FLOFS)
 #error "Missing instruction emitter for target CPU"
 #endif
 
+/* Generic load/store of register from/to stack slot. */
+#define emit_spload(as, ir, r, ofs) \
+  emit_loadofs(as, ir, (r), RID_SP, (ofs))
+#define emit_spstore(as, ir, r, ofs) \
+  emit_storeofs(as, ir, (r), RID_SP, (ofs))
+
 /* -- Register allocator debugging ---------------------------------------- */
 
 /* #define LUAJIT_DEBUG_RA */
@@ -315,7 +342,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
     emit_getgl(as, r, jit_base);
   } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
     lua_assert(irt_isnil(ir->t));  /* REF_NIL stores ASMREF_L register. */
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
 #if LJ_64
   } else if (ir->o == IR_KINT64) {
     emit_loadu64(as, r, ir_kint64(ir)->u64);
@@ -332,6 +359,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
 static int32_t ra_spill(ASMState *as, IRIns *ir)
 {
   int32_t slot = ir->s;
+  lua_assert(ir >= as->ir + REF_TRUE);
   if (!ra_hasspill(slot)) {
     if (irt_is64(ir->t)) {
       slot = as->evenspill;
@@ -672,7 +700,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
 	emit_loadu64(as, dest, ir_kint64(ir)->u64);
 	return;
 #endif
-      } else {
+      } else if (ir->o != IR_KPRI) {
 	lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
 		   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
 	emit_loadi(as, dest, ir->i);
@@ -922,44 +950,6 @@ static void asm_snap_prep(ASMState *as)
 
 /* -- Miscellaneous helpers ----------------------------------------------- */
 
-/* Collect arguments from CALL* and CARG instructions. */
-static void asm_collectargs(ASMState *as, IRIns *ir,
-			    const CCallInfo *ci, IRRef *args)
-{
-  uint32_t n = CCI_NARGS(ci);
-  lua_assert(n <= CCI_NARGS_MAX);
-  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
-  while (n-- > 1) {
-    ir = IR(ir->op1);
-    lua_assert(ir->o == IR_CARG);
-    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
-  }
-  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
-  lua_assert(IR(ir->op1)->o != IR_CARG);
-}
-
-/* Reconstruct CCallInfo flags for CALLX*. */
-static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
-{
-  uint32_t nargs = 0;
-  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
-    IRIns *ira = IR(ir->op1);
-    nargs++;
-    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
-  }
-#if LJ_HASFFI
-  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
-    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
-    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
-    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
-#if LJ_TARGET_X86
-    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
-#endif
-  }
-#endif
-  return (nargs | (ir->t.irt << CCI_OTSHIFT));
-}
-
 /* Calculate stack adjustment. */
 static int32_t asm_stack_adjust(ASMState *as)
 {
@@ -1044,6 +1034,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
   as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
+
+static void asm_bufhdr(ASMState *as, IRIns *ir)
+{
+  Reg sb = ra_dest(as, ir, RSET_GPR);
+  if ((ir->op2 & IRBUFHDR_APPEND)) {
+    /* Rematerialize const buffer pointer instead of likely spill. */
+    IRIns *irp = IR(ir->op1);
+    if (!(ra_hasreg(irp->r) || irp == ir-1 ||
+	  (irp == ir-2 && !ra_used(ir-1)))) {
+      while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
+	irp = IR(irp->op1);
+      if (irref_isk(irp->op1)) {
+	ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
+	ir = irp;
+      }
+    }
+  } else {
+    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+    /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
+    emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
+    emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
+  }
+#if LJ_TARGET_X86ORX64
+  ra_left(as, sb, ir->op1);
+#else
+  ra_leftov(as, sb, ir->op1);
+#endif
+}
+
+static void asm_bufput(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
+  IRRef args[3];
+  IRIns *irs;
+  int kchar = -1;
+  args[0] = ir->op1;  /* SBuf * */
+  args[1] = ir->op2;  /* GCstr * */
+  irs = IR(ir->op2);
+  lua_assert(irt_isstr(irs->t));
+  if (irs->o == IR_KGC) {
+    GCstr *s = ir_kstr(irs);
+    if (s->len == 1) {  /* Optimize put of single-char string constant. */
+      kchar = strdata(s)[0];
+      args[1] = ASMREF_TMP1;  /* int, truncated to char */
+      ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+    }
+  } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
+    if (irs->o == IR_TOSTR) {  /* Fuse number to string conversions. */
+      if (irs->op2 == IRTOSTR_NUM) {
+	args[1] = ASMREF_TMP1;  /* TValue * */
+	ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
+      } else {
+	lua_assert(irt_isinteger(IR(irs->op1)->t));
+	args[1] = irs->op1;  /* int */
+	if (irs->op2 == IRTOSTR_INT)
+	  ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
+	else
+	  ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+      }
+    } else if (irs->o == IR_SNEW) {  /* Fuse string allocation. */
+      args[1] = irs->op1;  /* const void * */
+      args[2] = irs->op2;  /* MSize */
+      ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
+    }
+  }
+  asm_setupresult(as, ir, ci);  /* SBuf * */
+  asm_gencall(as, ci, args);
+  if (args[1] == ASMREF_TMP1) {
+    Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
+    if (kchar == -1)
+      asm_tvptr(as, tmp, irs->op1);
+    else
+      ra_allockreg(as, kchar, tmp);
+  }
+}
+
+static void asm_bufstr(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
+  IRRef args[1];
+  args[0] = ir->op1;  /* SBuf *sb */
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCstr * */
+  asm_gencall(as, ci, args);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci;
+  IRRef args[2];
+  args[0] = ASMREF_L;
+  as->gcsteps++;
+  if (ir->op2 == IRTOSTR_NUM) {
+    args[1] = ASMREF_TMP1;  /* cTValue * */
+    ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
+  } else {
+    args[1] = ir->op1;  /* int32_t k */
+    if (ir->op2 == IRTOSTR_INT)
+      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
+    else
+      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
+  }
+  asm_setupresult(as, ir, ci);  /* GCstr * */
+  asm_gencall(as, ci, args);
+  if (ir->op2 == IRTOSTR_NUM)
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+}
+
+#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+  IRCallID id;
+  IRRef args[2];
+  lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
+  args[LJ_BE] = (ir-1)->op1;
+  args[LJ_LE] = ir->op1;
+  if (st == IRT_NUM || st == IRT_FLOAT) {
+    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+    ir--;
+  } else {
+    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
+  }
+  {
+#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
+    CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
+    cim.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
+#else
+    const CCallInfo *ci = &lj_ir_callinfo[id];
+#endif
+    asm_setupresult(as, ir, ci);
+    asm_gencall(as, ci, args);
+  }
+}
+#endif
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+  IRRef args[3];
+  if (ir->r == RID_SINK)
+    return;
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ir->op1;      /* GCtab *t     */
+  args[2] = ASMREF_TMP1;  /* cTValue *key */
+  asm_setupresult(as, ir, ci);  /* TValue * */
+  asm_gencall(as, ci, args);
+  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+}
+
+static void asm_lref(ASMState *as, IRIns *ir)
+{
+  Reg r = ra_dest(as, ir, RSET_GPR);
+#if LJ_TARGET_X86ORX64
+  ra_left(as, r, ASMREF_L);
+#else
+  ra_leftov(as, r, ASMREF_L);
+#endif
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Collect arguments from CALL* and CARG instructions. */
+static void asm_collectargs(ASMState *as, IRIns *ir,
+			    const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n = CCI_XNARGS(ci);
+  lua_assert(n <= CCI_NARGS_MAX*2);  /* Account for split args. */
+  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+  while (n-- > 1) {
+    ir = IR(ir->op1);
+    lua_assert(ir->o == IR_CARG);
+    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
+  }
+  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
+  lua_assert(IR(ir->op1)->o != IR_CARG);
+}
+
+/* Reconstruct CCallInfo flags for CALLX*. */
+static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
+{
+  uint32_t nargs = 0;
+  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
+    IRIns *ira = IR(ir->op1);
+    nargs++;
+    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
+  }
+#if LJ_HASFFI
+  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
+    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
+    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
+    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
+#if LJ_TARGET_X86
+    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
+#endif
+  }
+#endif
+  return (nargs | (ir->t.irt << CCI_OTSHIFT));
+}
+
+static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[id];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX];
+  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+  asm_collectargs(as, ir, ci, args);
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+#if !LJ_SOFTFP
+static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
+  IRRef args[2];
+  args[0] = lref;
+  args[1] = rref;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
+{
+  IRIns *irp = IR(ir->op1);
+  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+    IRIns *irpp = IR(irp->op1);
+    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+      asm_fppow(as, ir, irpp->op1, irp->op2);
+      return 1;
+    }
+  }
+  return 0;
+}
+#endif
+
 /* -- PHI and loop handling ----------------------------------------------- */
 
 /* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1181,6 +1424,7 @@ static void asm_phi_copyspill(ASMState *as)
 	if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) {
 	  emit_spstore(as, irl, r, sps_scale(irl->s));
 	  emit_spload(as, ir, r, sps_scale(ir->s));
+	  checkmclim(as);
 	}
       }
     }
@@ -1206,6 +1450,7 @@ static void asm_phi_copyspill(ASMState *as)
 	if (ra_hasspill(irl->s) && irt_isfp(ir->t)) {
 	  emit_spstore(as, irl, r, sps_scale(irl->s));
 	  emit_spload(as, ir, r, sps_scale(ir->s));
+	  checkmclim(as);
 	}
       }
     }
@@ -1223,16 +1468,18 @@ static void asm_phi_fixup(ASMState *as)
     Reg r = rset_picktop(work);
     IRRef lref = as->phireg[r];
     IRIns *ir = IR(lref);
-    /* Left PHI gained a spill slot before the loop? */
-    if (irt_ismarked(ir->t) && ra_hasspill(ir->s)) {
-      IRRef ren;
-      lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
-      ren = tref_ref(lj_ir_emit(as->J));
-      as->ir = as->T->ir;  /* The IR may have been reallocated. */
-      IR(ren)->r = (uint8_t)r;
-      IR(ren)->s = SPS_NONE;
+    if (irt_ismarked(ir->t)) {
+      irt_clearmark(ir->t);
+      /* Left PHI gained a spill slot before the loop? */
+      if (ra_hasspill(ir->s)) {
+	IRRef ren;
+	lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
+	ren = tref_ref(lj_ir_emit(as->J));
+	as->ir = as->T->ir;  /* The IR may have been reallocated. */
+	IR(ren)->r = (uint8_t)r;
+	IR(ren)->s = SPS_NONE;
+      }
     }
-    irt_clearmark(ir->t);  /* Always clear marker. */
     rset_clear(work, r);
   }
 }
@@ -1313,6 +1560,129 @@ static void asm_loop(ASMState *as)
 #error "Missing assembler for target CPU"
 #endif
 
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+  switch ((IROp)ir->o) {
+  /* Miscellaneous ops. */
+  case IR_LOOP: asm_loop(as); break;
+  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
+  case IR_USE:
+    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+  case IR_PHI: asm_phi(as, ir); break;
+  case IR_HIOP: asm_hiop(as, ir); break;
+  case IR_GCSTEP: asm_gcstep(as, ir); break;
+  case IR_PROF: asm_prof(as, ir); break;
+
+  /* Guarded assertions. */
+  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
+  case IR_ABC:
+    asm_comp(as, ir);
+    break;
+  case IR_EQ: case IR_NE:
+    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+      as->curins--;
+      asm_href(as, ir-1, (IROp)ir->o);
+    } else {
+      asm_equal(as, ir);
+    }
+    break;
+
+  case IR_RETF: asm_retf(as, ir); break;
+
+  /* Bit ops. */
+  case IR_BNOT: asm_bnot(as, ir); break;
+  case IR_BSWAP: asm_bswap(as, ir); break;
+  case IR_BAND: asm_band(as, ir); break;
+  case IR_BOR: asm_bor(as, ir); break;
+  case IR_BXOR: asm_bxor(as, ir); break;
+  case IR_BSHL: asm_bshl(as, ir); break;
+  case IR_BSHR: asm_bshr(as, ir); break;
+  case IR_BSAR: asm_bsar(as, ir); break;
+  case IR_BROL: asm_brol(as, ir); break;
+  case IR_BROR: asm_bror(as, ir); break;
+
+  /* Arithmetic ops. */
+  case IR_ADD: asm_add(as, ir); break;
+  case IR_SUB: asm_sub(as, ir); break;
+  case IR_MUL: asm_mul(as, ir); break;
+  case IR_DIV: asm_div(as, ir); break;
+  case IR_MOD: asm_mod(as, ir); break;
+  case IR_POW: asm_pow(as, ir); break;
+  case IR_NEG: asm_neg(as, ir); break;
+  case IR_ABS: asm_abs(as, ir); break;
+  case IR_ATAN2: asm_atan2(as, ir); break;
+  case IR_LDEXP: asm_ldexp(as, ir); break;
+  case IR_MIN: asm_min(as, ir); break;
+  case IR_MAX: asm_max(as, ir); break;
+  case IR_FPMATH: asm_fpmath(as, ir); break;
+
+  /* Overflow-checking arithmetic ops. */
+  case IR_ADDOV: asm_addov(as, ir); break;
+  case IR_SUBOV: asm_subov(as, ir); break;
+  case IR_MULOV: asm_mulov(as, ir); break;
+
+  /* Memory references. */
+  case IR_AREF: asm_aref(as, ir); break;
+  case IR_HREF: asm_href(as, ir, 0); break;
+  case IR_HREFK: asm_hrefk(as, ir); break;
+  case IR_NEWREF: asm_newref(as, ir); break;
+  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+  case IR_FREF: asm_fref(as, ir); break;
+  case IR_STRREF: asm_strref(as, ir); break;
+  case IR_LREF: asm_lref(as, ir); break;
+
+  /* Loads and stores. */
+  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+    asm_ahuvload(as, ir);
+    break;
+  case IR_FLOAD: asm_fload(as, ir); break;
+  case IR_XLOAD: asm_xload(as, ir); break;
+  case IR_SLOAD: asm_sload(as, ir); break;
+
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+  case IR_FSTORE: asm_fstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir); break;
+
+  /* Allocations. */
+  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+  case IR_TNEW: asm_tnew(as, ir); break;
+  case IR_TDUP: asm_tdup(as, ir); break;
+  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
+
+  /* Buffer operations. */
+  case IR_BUFHDR: asm_bufhdr(as, ir); break;
+  case IR_BUFPUT: asm_bufput(as, ir); break;
+  case IR_BUFSTR: asm_bufstr(as, ir); break;
+
+  /* Write barriers. */
+  case IR_TBAR: asm_tbar(as, ir); break;
+  case IR_OBAR: asm_obar(as, ir); break;
+
+  /* Type conversions. */
+  case IR_TOBIT: asm_tobit(as, ir); break;
+  case IR_CONV: asm_conv(as, ir); break;
+  case IR_TOSTR: asm_tostr(as, ir); break;
+  case IR_STRTO: asm_strto(as, ir); break;
+
+  /* Calls. */
+  case IR_CALLA:
+    as->gcsteps++;
+    /* fallthrough */
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+  case IR_CALLXS: asm_callx(as, ir); break;
+  case IR_CARG: break;
+
+  default:
+    setintV(&as->J->errinfo, ir->o);
+    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+    break;
+  }
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Head of a root trace. */
@@ -1347,6 +1717,11 @@ static void asm_head_side(ASMState *as)
   int pass3 = 0;
   IRRef i;
 
+  if (as->snapno && as->topslot > as->parent->topslot) {
+    /* Force snap #0 alloc to prevent register overwrite in stack check. */
+    as->snapno = 0;
+    asm_snap_alloc(as);
+  }
   allow = asm_head_side_base(as, irp, allow);
 
   /* Scan all parent SLOADs and collect register dependencies. */
@@ -1538,7 +1913,7 @@ static void asm_tail_link(ASMState *as)
     mres = (int32_t)(snap->nslots - baseslot);
     switch (bc_op(*pc)) {
     case BC_CALLM: case BC_CALLMT:
-      mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break;
+      mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
     case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
     case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
     default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1646,7 +2021,7 @@ static void asm_setup_regsp(ASMState *as)
 	as->modset |= RSET_SCRATCH;
       continue;
       }
-    case IR_CALLN: case IR_CALLL: case IR_CALLS: {
+    case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
       const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
       ir->prev = asm_setup_call_slots(as, ir, ci);
       if (inloop)
@@ -1691,10 +2066,20 @@ static void asm_setup_regsp(ASMState *as)
       /* fallthrough */
 #endif
     /* C calls evict all scratch regs and return results in RID_RET. */
-    case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
+    case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
       if (REGARG_NUMGPR < 3 && as->evenspill < 3)
 	as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
-    case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR:
+#if LJ_TARGET_X86 && LJ_HASFFI
+      if (0) {
+    case IR_CNEW:
+	if (ir->op2 != REF_NIL && as->evenspill < 4)
+	  as->evenspill = 4;  /* lj_cdata_newv needs 4 args. */
+      }
+#else
+    case IR_CNEW:
+#endif
+    case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
+    case IR_BUFSTR:
       ir->prev = REGSP_HINT(RID_RET);
       if (inloop)
 	as->modset = RSET_SCRATCH;
@@ -1703,21 +2088,26 @@ static void asm_setup_regsp(ASMState *as)
       if (inloop)
 	as->modset = RSET_SCRATCH;
       break;
-#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP
-    case IR_ATAN2: case IR_LDEXP:
+#if !LJ_SOFTFP
+    case IR_ATAN2:
+#if LJ_TARGET_X86
+      if (as->evenspill < 4)  /* Leave room to call atan2(). */
+	as->evenspill = 4;
+#endif
+#if !LJ_TARGET_X86ORX64
+    case IR_LDEXP:
+#endif
 #endif
     case IR_POW:
       if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-#if LJ_TARGET_X86ORX64
-	ir->prev = REGSP_HINT(RID_XMM0);
 	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
+	  as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
+	break;
 #else
 	ir->prev = REGSP_HINT(RID_FPRET);
-	if (inloop)
-	  as->modset |= RSET_SCRATCH;
-#endif
 	continue;
+#endif
       }
       /* fallthrough for integer POW */
     case IR_DIV: case IR_MOD:
@@ -1730,26 +2120,25 @@ static void asm_setup_regsp(ASMState *as)
       break;
     case IR_FPMATH:
 #if LJ_TARGET_X86ORX64
-      if (ir->op2 == IRFPM_EXP2) {  /* May be joined to lj_vm_pow_sse. */
-	ir->prev = REGSP_HINT(RID_XMM0);
-#if !LJ_64
-	if (as->evenspill < 4)  /* Leave room for 16 byte scratch area. */
+      if (ir->op2 <= IRFPM_TRUNC) {
+	if (!(as->flags & JIT_F_SSE4_1)) {
+	  ir->prev = REGSP_HINT(RID_XMM0);
+	  if (inloop)
+	    as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+	  continue;
+	}
+	break;
+      } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
+	if (as->evenspill < 4)  /* Leave room to call pow(). */
 	  as->evenspill = 4;
-#endif
-	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-	continue;
-      } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
-	ir->prev = REGSP_HINT(RID_XMM0);
-	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
-	continue;
       }
+#endif
+      if (inloop)
+	as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
       break;
 #else
       ir->prev = REGSP_HINT(RID_FPRET);
-      if (inloop)
-	as->modset |= RSET_SCRATCH;
       continue;
 #endif
 #if LJ_TARGET_X86ORX64
@@ -1822,6 +2211,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
 
   do {
     as->mcp = as->mctop;
+#ifdef LUA_USE_ASSERT
+    as->mcp_prev = as->mcp;
+#endif
     as->curins = T->nins;
     RA_DBG_START();
     RA_DBGX((as, "===== STOP ====="));

+ 1 - 1
jni/LuaJIT-2.0.1/src/lj_asm.h → jni/LuaJIT-2.1/src/lj_asm.h

@@ -1,6 +1,6 @@
 /*
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_ASM_H

+ 173 - 308
jni/LuaJIT-2.0.1/src/lj_asm_arm.h → jni/LuaJIT-2.1/src/lj_asm_arm.h

@@ -1,6 +1,6 @@
 /*
 ** ARM IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Register allocator extensions --------------------------------------- */
@@ -91,6 +91,7 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
   *mxp++ = group*EXITSTUBS_PER_GROUP;
   for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
     *mxp++ = ARMI_B|((-6-i)&0x00ffffffu);
+  lj_mcode_sync(as->mcbot, mxp);
   lj_mcode_commitbot(as->J, mxp);
   as->mcbot = mxp;
   as->mclim = as->mcbot + MCLIM_REDZONE;
@@ -337,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 0;
 #if LJ_SOFTFP
   Reg gpr = REGARG_FIRSTGPR;
@@ -452,18 +453,9 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   UNUSED(ci);
 }
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   CCallInfo ci;
   IRRef func;
   IRIns *irf;
@@ -489,9 +481,10 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   /* Need to force a spill on REF_BASE now to update the stack slot. */
   emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE)));
   emit_setgl(as, base, jit_base);
@@ -520,13 +513,15 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
 static void asm_tobit(ASMState *as, IRIns *ir)
 {
   RegSet allow = RSET_FPR;
-  Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, allow);
   Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
   Reg tmp = ra_scratch(as, rset_clear(allow, right));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
   emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
   emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
 }
+#else
+#define asm_tobit(as, ir)	lua_assert(0)
 #endif
 
 static void asm_conv(ASMState *as, IRIns *ir)
@@ -563,9 +558,9 @@ static void asm_conv(ASMState *as, IRIns *ir)
       lua_assert(irt_isint(ir->t) && st == IRT_NUM);
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
     } else {
-      Reg dest = ra_dest(as, ir, RSET_GPR);
       Reg left = ra_alloc1(as, lref, RSET_FPR);
       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+      Reg dest = ra_dest(as, ir, RSET_GPR);
       ARMIns ai;
       emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
       ai = irt_isint(ir->t) ?
@@ -599,31 +594,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
 }
 
-#if !LJ_SOFTFP && LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  CCallInfo ci;
-  IRRef args[2];
-  args[0] = (ir-1)->op1;
-  args[1] = ir->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = lj_ir_callinfo[id];
-#if !LJ_ABI_SOFTFP
-  ci.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
-#endif
-  asm_setupresult(as, ir, &ci);
-  asm_gencall(as, &ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -687,6 +657,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
 }
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
@@ -712,7 +684,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
       Reg src = ra_alloc1(as, ref, allow);
       emit_lso(as, ARMI_STR, src, RID_SP, 0);
     }
-    if ((ir+1)->o == IR_HIOP)
+    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
       type = ra_alloc1(as, ref+1, allow);
     else
       type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -720,27 +692,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
 }
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -958,20 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
 }
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1104,7 +1041,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
   if (ir->r != RID_SINK) {
     Reg src = ra_alloc1(as, ir->op2,
@@ -1114,6 +1051,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
 }
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1209,6 +1148,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
   } else
 #endif
   if (ra_used(ir)) {
+    Reg tmp = RID_NONE;
+    if ((ir->op2 & IRSLOAD_CONVERT))
+      tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
     lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
 	       irt_isint(ir->t) || irt_isaddr(ir->t));
     dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
@@ -1216,18 +1158,15 @@ static void asm_sload(ASMState *as, IRIns *ir)
     base = ra_alloc1(as, REF_BASE, allow);
     if ((ir->op2 & IRSLOAD_CONVERT)) {
       if (t == IRT_INT) {
-	Reg tmp = ra_scratch(as, RSET_FPR);
 	emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
 	emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15));
-	dest = tmp;
 	t = IRT_NUM;  /* Check for original type. */
       } else {
-	Reg tmp = ra_scratch(as, RSET_GPR);
 	emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15));
 	emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15));
-	dest = tmp;
 	t = IRT_INT;  /* Check for original type. */
       }
+      dest = tmp;
     }
     goto dotypecheck;
   }
@@ -1271,19 +1210,16 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
+  IRRef args[4];
   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
@@ -1305,16 +1241,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; ir--;
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   {
-    uint32_t k = emit_isk12(ARMI_MOV, ctypeid);
-    Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow);
+    uint32_t k = emit_isk12(ARMI_MOV, id);
+    Reg r = k ? RID_R1 : ra_allock(as, id, allow);
     emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
     emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
     emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
     if (k) emit_d(as, ARMI_MOV^k, RID_R1);
   }
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1391,24 +1339,41 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
   emit_dm(as, ai, (dest & 15), (left & 15));
 }
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_callround(ASMState *as, IRIns *ir, int id)
+{
+  /* The modified regs must match with the *.dasc implementation. */
+  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
+		RID2RSET(RID_R3)|RID2RSET(RID_R12);
+  RegSet of;
+  Reg dest, src;
+  ra_evictset(as, drop);
+  dest = ra_dest(as, ir, RSET_FPR);
+  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
+  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
+		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
+				   (void *)lj_vm_trunc_sf);
+  /* Workaround to protect argument GPRs from being used for remat. */
+  of = as->freeset;
+  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
+  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
+  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
+  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
+  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 <= IRFPM_TRUNC)
+    asm_callround(as, ir, ir->op2);
+  else if (ir->op2 == IRFPM_SQRT)
+    asm_fpunary(as, ir, ARMI_VSQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
+#else
+#define asm_fpmath(as, ir)	lua_assert(0)
 #endif
 
 static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1458,32 +1423,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
   asm_intop(as, ir, ai);
 }
 
-static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
-{
-  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
-    uint32_t cc = (as->mcp[1] >> 28);
-    as->flagmcp = NULL;
-    if (cc <= CC_NE) {
-      as->mcp++;
-      ai |= ARMI_S;
-    } else if (cc == CC_GE) {
-      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-      ai |= ARMI_S;
-    } else if (cc == CC_LT) {
-      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-      ai |= ARMI_S;
-    }  /* else: other conds don't work with bit ops. */
-  }
-  if (ir->op2 == 0) {
-    Reg dest = ra_dest(as, ir, RSET_GPR);
-    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
-    emit_d(as, ai^m, dest);
-  } else {
-    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
-    asm_intop(as, ir, ai);
-  }
-}
-
 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1502,7 +1441,7 @@ static void asm_intmul(ASMState *as, IRIns *ir)
   if (dest == left && left != right) { left = right; right = dest; }
   if (irt_isguard(ir->t)) {  /* IR_MULOV */
     if (!(as->flags & JIT_F_ARMV6) && dest == left)
-      tmp = left = ra_scratch(as, rset_exclude(RSET_FPR, left));
+      tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left));
     asm_guardcc(as, CC_NE);
     emit_nm(as, ARMI_TEQ|ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest);
     emit_dnm(as, ARMI_SMULL|ARMF_S(right), dest, RID_TMP, left);
@@ -1549,6 +1488,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
   asm_intmul(as, ir);
 }
 
+#define asm_addov(as, ir)	asm_add(as, ir)
+#define asm_subov(as, ir)	asm_sub(as, ir)
+#define asm_mulov(as, ir)	asm_mul(as, ir)
+
+#if LJ_SOFTFP
+#define asm_div(as, ir)		lua_assert(0)
+#define asm_pow(as, ir)		lua_assert(0)
+#define asm_abs(as, ir)		lua_assert(0)
+#define asm_atan2(as, ir)	lua_assert(0)
+#define asm_ldexp(as, ir)	lua_assert(0)
+#else
+#define asm_div(as, ir)		asm_fparith(as, ir, ARMI_VDIV_D)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+#define asm_abs(as, ir)		asm_fpunary(as, ir, ARMI_VABS_D)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+#endif
+
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 #if !LJ_SOFTFP
@@ -1560,41 +1519,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
   asm_intneg(as, ir, ARMI_RSB);
 }
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
 {
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
+  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
+    uint32_t cc = (as->mcp[1] >> 28);
+    as->flagmcp = NULL;
+    if (cc <= CC_NE) {
+      as->mcp++;
+      ai |= ARMI_S;
+    } else if (cc == CC_GE) {
+      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
+      ai |= ARMI_S;
+    } else if (cc == CC_LT) {
+      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
+      ai |= ARMI_S;
+    }  /* else: other conds don't work with bit ops. */
+  }
+  if (ir->op2 == 0) {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+    emit_d(as, ai^m, dest);
+  } else {
+    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
+    asm_intop(as, ir, ai);
+  }
 }
 
-#if !LJ_SOFTFP
-static void asm_callround(ASMState *as, IRIns *ir, int id)
-{
-  /* The modified regs must match with the *.dasc implementation. */
-  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
-		RID2RSET(RID_R3)|RID2RSET(RID_R12);
-  RegSet of;
-  Reg dest, src;
-  ra_evictset(as, drop);
-  dest = ra_dest(as, ir, RSET_FPR);
-  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
-  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
-		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
-				   (void *)lj_vm_trunc_sf);
-  /* Workaround to protect argument GPRs from being used for remat. */
-  of = as->freeset;
-  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
-  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
-  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
-  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
-  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
-}
-#endif
+#define asm_bnot(as, ir)	asm_bitop(as, ir, ARMI_MVN)
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1611,6 +1564,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_band(as, ir)	asm_bitop(as, ir, ARMI_AND)
+#define asm_bor(as, ir)		asm_bitop(as, ir, ARMI_ORR)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, ARMI_EOR)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
 {
   if (irref_isk(ir->op2)) {  /* Constant shifts. */
@@ -1628,6 +1585,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
   }
 }
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, ARMSH_LSL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, ARMSH_LSR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, ARMSH_ASR)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, ARMSH_ROR)
+#define asm_brol(as, ir)	lua_assert(0)
+
 static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 {
   uint32_t kcmp = 0, kmov = 0;
@@ -1701,6 +1664,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
     asm_intmin_max(as, ir, cc);
 }
 
+#define asm_min(as, ir)		asm_min_max(as, ir, CC_GT, CC_HI)
+#define asm_max(as, ir)		asm_min_max(as, ir, CC_LT, CC_LO)
+
 /* -- Comparisons --------------------------------------------------------- */
 
 /* Map of comparisons to flags. ORDER IR. */
@@ -1816,6 +1782,19 @@ notst:
     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
 }
 
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+  if (irt_isnum(ir->t))
+    asm_fpcomp(as, ir);
+  else
+#endif
+    asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
+#if LJ_HASFFI
 /* 64 bit integer comparisons. */
 static void asm_int64comp(ASMState *as, IRIns *ir)
 {
@@ -1851,6 +1830,7 @@ static void asm_int64comp(ASMState *as, IRIns *ir)
   }
   emit_n(as, ARMI_CMP^mhi, lefthi);
 }
+#endif
 
 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
 
@@ -1864,11 +1844,14 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   if ((ir-1)->o <= IR_NE) {  /* 64 bit integer or FP comparisons. ORDER IR. */
     as->curins--;  /* Always skip the loword comparison. */
 #if LJ_SOFTFP
-    if (!irt_isint(ir->t))
+    if (!irt_isint(ir->t)) {
       asm_sfpcomp(as, ir-1);
-    else
+      return;
+    }
+#endif
+#if LJ_HASFFI
+    asm_int64comp(as, ir-1);
 #endif
-      asm_int64comp(as, ir-1);
     return;
 #if LJ_SOFTFP
   } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
@@ -1885,7 +1868,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
   } else if ((ir-1)->o == IR_XSTORE) {
     if ((ir-1)->r != RID_SINK)
-      asm_xstore(as, ir, 4);
+      asm_xstore_(as, ir, 4);
     return;
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
@@ -1933,6 +1916,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 }
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
+  emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1962,7 +1955,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
 	   (int32_t)offsetof(lua_State, maxstack));
   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
-    int32_t i = i32ptr(&J2G(as->J)->jit_L);
+    int32_t i = i32ptr(&J2G(as->J)->cur_L);
     if (ra_hasspill(irp->s))
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1970,7 +1963,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
     emit_loadi(as, RID_TMP, (i & ~4095));
   } else {
-    emit_getgl(as, RID_TMP, jit_L);
+    emit_getgl(as, RID_TMP, cur_L);
   }
 }
 
@@ -2079,13 +2072,13 @@ static void asm_loop_fixup(ASMState *as)
 
 /* -- Head of trace ------------------------------------------------------- */
 
-/* Reload L register from g->jit_L. */
+/* Reload L register from g->cur_L. */
 static void asm_head_lreg(ASMState *as)
 {
   IRIns *ir = IR(ASMREF_L);
   if (ra_used(ir)) {
     Reg r = ra_dest(as, ir, RSET_GPR);
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
     ra_evictk(as);
   }
 }
@@ -2096,7 +2089,8 @@ static void asm_head_root_base(ASMState *as)
   IRIns *ir;
   asm_head_lreg(as);
   ir = IR(REF_BASE);
-  if (ra_hasreg(ir->r) && rset_test(as->modset, ir->r)) ra_spill(as, ir);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
   ra_destreg(as, ir, RID_BASE);
 }
 
@@ -2106,7 +2100,8 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   IRIns *ir;
   asm_head_lreg(as);
   ir = IR(REF_BASE);
-  if (ra_hasreg(ir->r) && rset_test(as->modset, ir->r)) ra_spill(as, ir);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
   if (ra_hasspill(irp->s)) {
     rset_clear(allow, ra_dest(as, ir, allow));
   } else {
@@ -2154,143 +2149,13 @@ static void asm_tail_prep(ASMState *as)
   *p = 0;  /* Prevent load/store merging. */
 }
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-      break;
-    }
-    /* fallthrough */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-#if !LJ_SOFTFP
-    if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
-#endif
-    asm_intcomp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
-  case IR_BOR:  asm_bitop(as, ir, ARMI_ORR); break;
-  case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
-  case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
-  case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
-  case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
-  case IR_BROL: lua_assert(0); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
-  case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
-  case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-#if LJ_SOFTFP
-  case IR_DIV: case IR_POW: case IR_ABS:
-  case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
-    lua_assert(0);  /* Unused for LJ_SOFTFP. */
-    break;
-#else
-  case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 <= IRFPM_TRUNC)
-      asm_callround(as, ir, ir->op2);
-    else if (ir->op2 == IRFPM_SQRT)
-      asm_fpunary(as, ir, ARMI_VSQRT_D);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-#endif
-
-  case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
-  case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir, 0); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
-  IRRef args[CCI_NARGS_MAX];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++) {

+ 118 - 259
jni/LuaJIT-2.0.1/src/lj_asm_mips.h → jni/LuaJIT-2.1/src/lj_asm_mips.h

@@ -1,6 +1,6 @@
 /*
 ** MIPS IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Register allocator extensions --------------------------------------- */
@@ -71,6 +71,7 @@ static void asm_sparejump_setup(ASMState *as)
     memset(mxp+2, 0, MIPS_SPAREJUMP*8);
     mxp += MIPS_SPAREJUMP*2;
     lua_assert(mxp < as->mctop);
+    lj_mcode_sync(as->mcbot, mxp);
     lj_mcode_commitbot(as->J, mxp);
     as->mcbot = mxp;
     as->mclim = as->mcbot + MCLIM_REDZONE;
@@ -225,7 +226,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 16;
   Reg gpr, fpr = REGARG_FIRSTFPR;
   if ((void *)ci->func)
@@ -282,6 +283,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
       else
 	ofs += 4;
     }
+    checkmclim(as);
   }
 }
 
@@ -324,18 +326,9 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   }
 }
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   CCallInfo ci;
   IRRef func;
   IRIns *irf;
@@ -360,16 +353,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
   asm_gencall(as, &ci, args);
 }
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
 {
   /* The modified regs must match with the *.dasc implementation. */
@@ -389,9 +372,10 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   emit_setgl(as, base, jit_base);
   emit_addptr(as, base, -8*delta);
   asm_guard(as, MIPSI_BNE, RID_TMP,
@@ -517,28 +501,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
 }
 
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  const CCallInfo *ci;
-  IRRef args[2];
-  args[LJ_BE?0:1] = ir->op1;
-  args[LJ_BE?1:0] = (ir-1)->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = &lj_ir_callinfo[id];
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -555,6 +517,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
 	   RID_SP, sps_scale(ir->s));
 }
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
@@ -578,27 +542,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
 }
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -628,7 +571,7 @@ static void asm_aref(ASMState *as, IRIns *ir)
 **   } while ((n = nextnode(n)));
 **   return niltv(L);
 */
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 {
   RegSet allow = RSET_GPR;
   int destused = ra_used(ir);
@@ -654,37 +597,42 @@ static void asm_href(ASMState *as, IRIns *ir)
   tmp2 = ra_scratch(as, allow);
   rset_clear(allow, tmp2);
 
-  /* Key not found in chain: load niltv. */
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
   l_end = emit_label(as);
-  if (destused)
+  as->invmcp = NULL;
+  if (merge == IR_NE)
+    asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
+  else if (destused)
     emit_loada(as, dest, niltvg(J2G(as->J)));
-  else
-    *--as->mcp = MIPSI_NOP;
   /* Follow hash chain until the end. */
-  emit_move(as, dest, tmp1);
+  emit_move(as, dest, tmp2);
   l_loop = --as->mcp;
-  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next));
+  emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next));
   l_next = emit_label(as);
 
   /* Type and value comparison. */
+  if (merge == IR_EQ) {  /* Must match asm_guard(). */
+    emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
+    l_end = asm_exitstub_addr(as);
+  }
   if (irt_isnum(kt)) {
     emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
     emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
-	emit_tg(as, MIPSI_MFC1, tmp1, key+1);
-    emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
-    emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
+    *--as->mcp = MIPSI_NOP;  /* Avoid NaN comparison overhead. */
+    emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next);
+    emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM);
     emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
   } else {
     if (irt_ispri(kt)) {
-      emit_branch(as, MIPSI_BEQ, tmp1, type, l_end);
+      emit_branch(as, MIPSI_BEQ, tmp2, type, l_end);
     } else {
-      emit_branch(as, MIPSI_BEQ, tmp2, key, l_end);
-      emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
-      emit_branch(as, MIPSI_BNE, tmp1, type, l_next);
+      emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
+      emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr));
+      emit_branch(as, MIPSI_BNE, tmp2, type, l_next);
     }
   }
-  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
-  *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
+  emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it));
+  *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);
 
   /* Load main position relative to tab->node into dest. */
   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
@@ -774,20 +722,6 @@ nolo:
     emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
 }
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  if (ir->r != RID_SINK) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-    IRRef args[3];
-    args[0] = ASMREF_L;     /* lua_State *L */
-    args[1] = ir->op1;      /* GCtab *t     */
-    args[2] = ASMREF_TMP1;  /* cTValue *key */
-    asm_setupresult(as, ir, ci);  /* TValue * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-  }
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -916,7 +850,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
   if (ir->r != RID_SINK) {
     Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
@@ -925,6 +859,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
 }
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
   IRType1 t = ir->t;
@@ -1000,7 +936,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
       if (irt_isint(t)) {
 	Reg tmp = ra_scratch(as, RSET_FPR);
 	emit_tg(as, MIPSI_MFC1, dest, tmp);
-	emit_fg(as, MIPSI_CVT_W_D, tmp, tmp);
+	emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
 	dest = tmp;
 	t.irt = IRT_NUM;  /* Check for original type. */
       } else {
@@ -1040,19 +976,15 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  IRRef args[4];
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
@@ -1061,6 +993,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
 
   /* Initialize immutable cdata object. */
   if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
     int32_t ofs = sizeof(GCcdata);
     lua_assert(sz == 4 || sz == 8);
     if (sz == 8) {
@@ -1075,12 +1008,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; if (LJ_BE) ir++; else ir--;
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
-  emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */
+  emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1150,23 +1095,16 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
   emit_fg(as, mi, dest, left);
 }
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 <= IRFPM_TRUNC)
+    asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
+  else if (ir->op2 == IRFPM_SQRT)
+    asm_fpunary(as, ir, MIPSI_SQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 
 static void asm_add(ASMState *as, IRIns *ir)
@@ -1212,6 +1150,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_div(as, ir)		asm_fparith(as, ir, MIPSI_DIV_D)
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 {
   if (irt_isnum(ir->t)) {
@@ -1223,6 +1165,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_abs(as, ir)		asm_fpunary(as, ir, MIPSI_ABS_D)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
 static void asm_arithov(ASMState *as, IRIns *ir)
 {
   Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
@@ -1256,13 +1202,21 @@ static void asm_arithov(ASMState *as, IRIns *ir)
     emit_move(as, RID_TMP, dest == left ? left : right);
 }
 
+#define asm_addov(as, ir)	asm_arithov(as, ir)
+#define asm_subov(as, ir)	asm_arithov(as, ir)
+
 static void asm_mulov(ASMState *as, IRIns *ir)
 {
-#if LJ_DUALNUM
-#error "NYI: MULOV"
-#else
-  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused in single-number mode. */
-#endif
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
+						 right), dest));
+  asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
+  emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
+  emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
+  emit_dst(as, MIPSI_MFLO, dest, 0, 0);
+  emit_dst(as, MIPSI_MULT, 0, left, right);
 }
 
 #if LJ_HASFFI
@@ -1349,7 +1303,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
 }
 #endif
 
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
 {
   Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
   IRIns *irl = IR(ir->op1);
@@ -1363,7 +1317,7 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
   emit_dst(as, MIPSI_NOR, dest, left, right);
 }
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1399,6 +1353,10 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   emit_dst(as, mi, dest, left, right);
 }
 
+#define asm_band(as, ir)	asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
+#define asm_bor(as, ir)		asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1412,7 +1370,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   }
 }
 
-static void asm_bitror(ASMState *as, IRIns *ir)
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
+#define asm_brol(as, ir)	lua_assert(0)
+
+static void asm_bror(ASMState *as, IRIns *ir)
 {
   if ((as->flags & JIT_F_MIPS32R2)) {
     asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
@@ -1461,6 +1424,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
   }
 }
 
+#define asm_min(as, ir)		asm_min_max(as, ir, 0)
+#define asm_max(as, ir)		asm_min_max(as, ir, 1)
+
 /* -- Comparisons --------------------------------------------------------- */
 
 static void asm_comp(ASMState *as, IRIns *ir)
@@ -1498,7 +1464,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
   }
 }
 
-static void asm_compeq(ASMState *as, IRIns *ir)
+static void asm_equal(ASMState *as, IRIns *ir)
 {
   Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
   right = (left >> 8); left &= 255;
@@ -1572,8 +1538,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
-      asm_xstore(as, ir, LJ_LE ? 4 : 0);
-      asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
+      asm_xstore_(as, ir, LJ_LE ? 4 : 0);
+      asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
     }
     return;
   }
@@ -1597,6 +1563,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 }
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
+  emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
+  emit_lsglptr(as, MIPSI_LBU, RID_TMP,
+	       (int32_t)offsetof(global_State, hookmask));
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1621,7 +1598,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
 }
@@ -1721,7 +1698,7 @@ static void asm_head_root_base(ASMState *as)
   if (as->loopinv) as->mctop--;
   if (ra_hasreg(r)) {
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (r != RID_BASE)
       emit_move(as, r, RID_BASE);
@@ -1736,7 +1713,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   if (as->loopinv) as->mctop--;
   if (ra_hasreg(r)) {
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
@@ -1769,131 +1746,13 @@ static void asm_tail_prep(ASMState *as)
   as->invmcp = as->loopref ? as->mcp : NULL;
 }
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-    asm_comp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitnot(as, ir); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
-  case IR_BOR:  asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
-  case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
-  case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
-  case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
-  case IR_BROL: lua_assert(0); break;
-  case IR_BROR: asm_bitror(as, ir); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB: asm_sub(as, ir); break;
-  case IR_MUL: asm_mul(as, ir); break;
-  case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-  case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_MIN: asm_min_max(as, ir, 0); break;
-  case IR_MAX: asm_min_max(as, ir, 1); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 <= IRFPM_TRUNC)
-      asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    else if (ir->op2 == IRFPM_SQRT)
-      asm_fpunary(as, ir, MIPSI_SQRT_D);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-
-  /* Overflow-checking arithmetic ops. */
-  case IR_ADDOV: asm_arithov(as, ir); break;
-  case IR_SUBOV: asm_arithov(as, ir); break;
-  case IR_MULOV: asm_mulov(as, ir); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
-  IRRef args[CCI_NARGS_MAX];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++) {

+ 119 - 269
jni/LuaJIT-2.0.1/src/lj_asm_ppc.h → jni/LuaJIT-2.1/src/lj_asm_ppc.h

@@ -1,6 +1,6 @@
 /*
 ** PPC IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Register allocator extensions --------------------------------------- */
@@ -49,6 +49,8 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
 {
   ExitNo i;
   MCode *mxp = as->mctop;
+  if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+    asm_mclimit(as);
   /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
   for (i = nexits-1; (int32_t)i >= 0; i--)
     *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
@@ -249,7 +251,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 8;
   Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
   if ((void *)ci->func)
@@ -286,6 +288,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
       else
 	ofs += 4;
     }
+    checkmclim(as);
   }
   if ((ci->flags & CCI_VARARG))  /* Vararg calls need to know about FPR use. */
     emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
@@ -320,26 +323,19 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       } else {
 	ra_destreg(as, ir, RID_FPRET);
       }
+#if LJ_32
     } else if (hiop) {
       ra_destpair(as, ir);
+#endif
     } else {
       ra_destreg(as, ir, RID_RET);
     }
   }
 }
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   CCallInfo ci;
   IRRef func;
   IRIns *irf;
@@ -349,7 +345,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
   func = ir->op2; irf = IR(func);
   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
   if (irref_isk(func)) {  /* Call to constant address. */
-    ci.func = (ASMFunction)(void *)(irf->i);
+    ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
   } else {  /* Need a non-argument register for indirect calls. */
     RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
     Reg freg = ra_alloc1(as, func, allow);
@@ -360,16 +356,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
   asm_gencall(as, &ci, args);
 }
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 /* -- Returns ------------------------------------------------------------- */
 
 /* Return to lower frame. Guard that it goes to the right spot. */
@@ -377,9 +363,10 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   emit_setgl(as, base, jit_base);
   emit_addptr(as, base, -8*delta);
   asm_guardcc(as, CC_NE);
@@ -509,28 +496,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
 }
 
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  const CCallInfo *ci;
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = (ir-1)->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = &lj_ir_callinfo[id];
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -549,6 +514,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
   emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
 }
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
@@ -562,7 +529,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
     /* Otherwise use g->tmptv to hold the TValue. */
     RegSet allow = rset_exclude(RSET_GPR, dest);
     Reg type;
-    emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
+    emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
     if (!irt_ispri(ir->t)) {
       Reg src = ra_alloc1(as, ref, allow);
       emit_setgl(as, src, tmptv.gcr);
@@ -572,27 +539,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
 }
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -769,20 +715,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
   }
 }
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -913,7 +845,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
   IRIns *irb;
   if (ir->r == RID_SINK)
@@ -930,6 +862,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
 }
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
   IRType1 t = ir->t;
@@ -1080,19 +1014,15 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  IRRef args[4];
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
@@ -1101,6 +1031,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
 
   /* Initialize immutable cdata object. */
   if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
     int32_t ofs = sizeof(GCcdata);
     lua_assert(sz == 4 || sz == 8);
     if (sz == 8) {
@@ -1114,12 +1045,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; ir++;
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
-  emit_ti(as, PPCI_LI, RID_TMP, ctypeid);  /* Lower 16 bit used. Sign-ext ok. */
+  emit_ti(as, PPCI_LI, RID_TMP, id);  /* Lower 16 bit used. Sign-ext ok. */
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1193,23 +1136,14 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
   emit_fb(as, pi, dest, left);
 }
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
+    asm_fpunary(as, ir, PPCI_FSQRT);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 
 static void asm_add(ASMState *as, IRIns *ir)
@@ -1309,6 +1243,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_div(as, ir)		asm_fparith(as, ir, PPCI_FDIV)
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 {
   if (irt_isnum(ir->t)) {
@@ -1327,6 +1265,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_abs(as, ir)		asm_fpunary(as, ir, PPCI_FABS)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
 {
   Reg dest, left, right;
@@ -1342,6 +1284,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
   emit_tab(as, pi|PPCF_DOT, dest, left, right);
 }
 
+#define asm_addov(as, ir)	asm_arithov(as, ir, PPCI_ADDO)
+#define asm_subov(as, ir)	asm_arithov(as, ir, PPCI_SUBFO)
+#define asm_mulov(as, ir)	asm_arithov(as, ir, PPCI_MULLWO)
+
 #if LJ_HASFFI
 static void asm_add64(ASMState *as, IRIns *ir)
 {
@@ -1421,7 +1367,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
 }
 #endif
 
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
 {
   Reg dest, left, right;
   PPCIns pi = PPCI_NOR;
@@ -1448,7 +1394,7 @@ nofuse:
   emit_asb(as, pi, dest, left, right);
 }
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   IRIns *irx;
@@ -1469,32 +1415,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   }
 }
 
-static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
-{
-  Reg dest = ra_dest(as, ir, RSET_GPR);
-  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
-  if (irref_isk(ir->op2)) {
-    int32_t k = IR(ir->op2)->i;
-    Reg tmp = left;
-    if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
-      if (!checku16(k)) {
-	emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
-	if ((k & 0xffff) == 0) return;
-      }
-      emit_asi(as, pik, dest, left, k);
-      return;
-    }
-  }
-  /* May fail due to spills/restores above, but simplifies the logic. */
-  if (as->flagmcp == as->mcp) {
-    as->flagmcp = NULL;
-    as->mcp++;
-    pi |= PPCF_DOT;
-  }
-  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
-  emit_asb(as, pi, dest, left, right);
-}
-
 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
 {
@@ -1525,7 +1445,7 @@ nofuse:
   *--as->mcp = pi | PPCF_T(left);
 }
 
-static void asm_bitand(ASMState *as, IRIns *ir)
+static void asm_band(ASMState *as, IRIns *ir)
 {
   Reg dest, left, right;
   IRRef lref = ir->op1;
@@ -1580,6 +1500,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
   emit_asb(as, PPCI_AND ^ dot, dest, left, right);
 }
 
+static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    Reg tmp = left;
+    if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
+      if (!checku16(k)) {
+	emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
+	if ((k & 0xffff) == 0) return;
+      }
+      emit_asi(as, pik, dest, left, k);
+      return;
+    }
+  }
+  /* May fail due to spills/restores above, but simplifies the logic. */
+  if (as->flagmcp == as->mcp) {
+    as->flagmcp = NULL;
+    as->mcp++;
+    pi |= PPCF_DOT;
+  }
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_asb(as, pi, dest, left, right);
+}
+
+#define asm_bor(as, ir)		asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
 {
   Reg dest, left;
@@ -1605,6 +1554,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
   }
 }
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, PPCI_SLW, 0)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, PPCI_SRW, 1)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
+#define asm_brol(as, ir) \
+  asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
+		       PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
+#define asm_bror(as, ir)	lua_assert(0)
+
 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
 {
   if (irt_isnum(ir->t)) {
@@ -1635,6 +1592,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
   }
 }
 
+#define asm_min(as, ir)		asm_min_max(as, ir, 0)
+#define asm_max(as, ir)		asm_min_max(as, ir, 1)
+
 /* -- Comparisons --------------------------------------------------------- */
 
 #define CC_UNSIGNED	0x08	/* Unsigned integer comparison. */
@@ -1711,6 +1671,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_HASFFI
 /* 64 bit integer comparisons. */
 static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1756,8 +1718,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
-      asm_xstore(as, ir, 0);
-      asm_xstore(as, ir-1, 4);
+      asm_xstore_(as, ir, 0);
+      asm_xstore_(as, ir-1, 4);
     }
     return;
   }
@@ -1781,6 +1743,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 }
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
+  emit_lsglptr(as, PPCI_LBZ, RID_TMP,
+	       (int32_t)offsetof(global_State, hookmask));
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1802,7 +1775,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
 }
@@ -1900,7 +1873,7 @@ static void asm_head_root_base(ASMState *as)
   Reg r = ir->r;
   if (ra_hasreg(r)) {
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (r != RID_BASE)
       emit_mr(as, r, RID_BASE);
@@ -1914,7 +1887,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   Reg r = ir->r;
   if (ra_hasreg(r)) {
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
@@ -1963,136 +1936,13 @@ static void asm_tail_prep(ASMState *as)
   }
 }
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-      break;
-    }
-    /* fallthrough */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-    asm_comp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitnot(as, ir); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitand(as, ir); break;
-  case IR_BOR:  asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
-  case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
-  case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
-  case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
-  case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
-			     PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
-  case IR_BROR: lua_assert(0); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB: asm_sub(as, ir); break;
-  case IR_MUL: asm_mul(as, ir); break;
-  case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-  case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_MIN: asm_min_max(as, ir, 0); break;
-  case IR_MAX: asm_min_max(as, ir, 1); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
-      asm_fpunary(as, ir, PPCI_FSQRT);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-
-  /* Overflow-checking arithmetic ops. */
-  case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
-  case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
-  case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir, 0); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
-  IRRef args[CCI_NARGS_MAX];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++)

+ 227 - 389
jni/LuaJIT-2.0.1/src/lj_asm_x86.h → jni/LuaJIT-2.1/src/lj_asm_x86.h

@@ -1,6 +1,6 @@
 /*
 ** x86/x64 IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Guard handling ------------------------------------------------------ */
@@ -325,6 +325,14 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
       as->mrm.base = as->mrm.idx = RID_NONE;
       return RID_MRM;
     }
+  } else if (ir->o == IR_KINT64) {
+    RegSet avail = as->freeset & ~as->modset & RSET_GPR;
+    lua_assert(allow != RSET_EMPTY);
+    if (!(avail & (avail-1))) {  /* Fuse if less than two regs available. */
+      as->mrm.ofs = ptr2addr(ir_kint64(ir));
+      as->mrm.base = as->mrm.idx = RID_NONE;
+      return RID_MRM;
+    }
   } else if (mayfuse(as, ref)) {
     RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
     if (ir->o == IR_SLOAD) {
@@ -361,7 +369,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
       return RID_MRM;
     }
   }
-  if (!(as->freeset & allow) &&
+  if (!(as->freeset & allow) && !irref_isk(ref) &&
       (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
     goto fusespill;
   return ra_allocref(as, ref, allow);
@@ -384,7 +392,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
 /* Count the required number of stack slots for a call. */
 static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t i, nargs = CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 0;
 #if LJ_64
   if (LJ_ABI_WIN) {
@@ -417,7 +425,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = STACKARG_OFS;
 #if LJ_64
   uint32_t gprs = REGARG_GPRS;
@@ -512,6 +520,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
       }
       ofs += sizeof(intptr_t);
     }
+    checkmclim(as);
   }
 #if LJ_64 && !LJ_ABI_WIN
   if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR;
@@ -551,7 +560,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       if (ra_hasreg(dest)) {
 	ra_free(as, dest);
 	ra_modified(as, dest);
-	emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
+	emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
 		  dest, RID_ESP, ofs);
       }
       if ((ci->flags & CCI_CASTU64)) {
@@ -570,20 +579,11 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       lua_assert(!irt_ispri(ir->t));
       ra_destreg(as, ir, RID_RET);
     }
-  } else if (LJ_32 && irt_isfp(ir->t)) {
+  } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) {
     emit_x87op(as, XI_FPOP);  /* Pop unused result from x87 st0. */
   }
 }
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 /* Return a constant function pointer or NULL for indirect calls. */
 static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
 {
@@ -608,7 +608,7 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
 
 static void asm_callx(ASMState *as, IRIns *ir)
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   CCallInfo ci;
   IRRef func;
   IRIns *irf;
@@ -643,9 +643,10 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   emit_setgl(as, base, jit_base);
   emit_addptr(as, base, -8*delta);
   asm_guardcc(as, CC_NE);
@@ -662,8 +663,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
   asm_guardcc(as, CC_NE);
   emit_rr(as, XO_UCOMISD, left, tmp);
   emit_rr(as, XO_CVTSI2SD, tmp, dest);
-  if (!(as->flags & JIT_F_SPLIT_XMM))
-    emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
+  emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
   emit_rr(as, XO_CVTTSD2SI, dest, left);
   /* Can't fuse since left is needed twice. */
 }
@@ -719,8 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
 	       dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
     }
-    if (!(as->flags & JIT_F_SPLIT_XMM))
-      emit_rr(as, XO_XORPS, dest, dest);  /* Avoid partial register stall. */
+    emit_rr(as, XO_XORPS, dest, dest);  /* Avoid partial register stall. */
   } else if (stfp) {  /* FP to integer conversion. */
     if (irt_isguard(ir->t)) {
       /* Checked conversions are only supported from number to int. */
@@ -728,9 +727,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
     } else {
       Reg dest = ra_dest(as, ir, RSET_GPR);
-      x86Op op = st == IRT_NUM ?
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
+      x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
       if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
 	/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
 	/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -824,8 +821,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
   if (ra_hasreg(dest)) {
     ra_free(as, dest);
     ra_modified(as, dest);
-    emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
-	      dest, RID_ESP, ofs);
+    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
   }
   emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
 	    irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -853,7 +849,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
   Reg lo, hi;
   lua_assert(st == IRT_NUM || st == IRT_FLOAT);
   lua_assert(dt == IRT_I64 || dt == IRT_U64);
-  lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
   hi = ra_dest(as, ir, RSET_GPR);
   lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
   if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -896,6 +891,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
 	   st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
 	   asm_fuseload(as, ir->op1, RSET_EMPTY));
 }
+
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  if (irt_isfp(ir->t))
+    asm_conv_fp_int64(as, ir);
+  else
+    asm_conv_int64_fp(as, ir);
+}
 #endif
 
 static void asm_strto(ASMState *as, IRIns *ir)
@@ -917,29 +920,32 @@ static void asm_strto(ASMState *as, IRIns *ir)
 	    RID_ESP, sps_scale(ir->s));
 }
 
-static void asm_tostr(ASMState *as, IRIns *ir)
+/* -- Memory references --------------------------------------------------- */
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
-  IRIns *irl = IR(ir->op1);
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(irl->t)) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
-	      RID_ESP, ra_spill(as, irl));
+  IRIns *ir = IR(ref);
+  if (irt_isnum(ir->t)) {
+    /* For numbers use the constant itself or a spill slot as a TValue. */
+    if (irref_isk(ref))
+      emit_loada(as, dest, ir_knum(ir));
+    else
+      emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
   } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
+    /* Otherwise use g->tmptv to hold the TValue. */
+    if (!irref_isk(ref)) {
+      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+    } else if (!irt_ispri(ir->t)) {
+      emit_movmroi(as, dest, 0, ir->i);
+    }
+    if (!(LJ_64 && irt_islightud(ir->t)))
+      emit_movmroi(as, dest, 4, irt_toitype(ir->t));
+    emit_loada(as, dest, &J2G(as->J)->tmptv);
   }
 }
 
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -950,23 +956,6 @@ static void asm_aref(ASMState *as, IRIns *ir)
     emit_rr(as, XO_MOV, dest, as->mrm.base);
 }
 
-/* Merge NE(HREF, niltv) check. */
-static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
-{
-  /* Assumes nothing else generates NE of HREF. */
-  if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
-      ra_hasreg(ir->r)) {
-    MCode *p = as->mcp;
-    p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
-    /* Ensure no loop branch inversion happened. */
-    if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
-      as->mcp = p;  /* Kill cmp reg, imm32 + jz exit. */
-      return p + *(int32_t *)(p-4);  /* Return exit address. */
-    }
-  }
-  return NULL;
-}
-
 /* Inlined hash lookup. Specialized for key type and for const keys.
 ** The equivalent C code is:
 **   Node *n = hashkey(t, key);
@@ -975,10 +964,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
 **   } while ((n = nextnode(n)));
 **   return niltv(L);
 */
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 {
-  MCode *nilexit = merge_href_niltv(as, ir);  /* Do this before any restores. */
   RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
   Reg dest = ra_dest(as, ir, allow);
   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
   Reg key = RID_NONE, tmp = RID_NONE;
@@ -995,14 +984,12 @@ static void asm_href(ASMState *as, IRIns *ir)
       tmp = ra_scratch(as, rset_exclude(allow, key));
   }
 
-  /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
   l_end = emit_label(as);
-  if (nilexit && ir[1].o == IR_NE) {
-    emit_jcc(as, CC_E, nilexit);  /* XI_JMP is not found by lj_asm_patchexit. */
-    nilexit = NULL;
-  } else {
+  if (merge == IR_NE)
+    asm_guardcc(as, CC_E);  /* XI_JMP is not found by lj_asm_patchexit. */
+  else if (destused)
     emit_loada(as, dest, niltvg(J2G(as->J)));
-  }
 
   /* Follow hash chain until the end. */
   l_loop = emit_sjcc_label(as, CC_NZ);
@@ -1011,8 +998,8 @@ static void asm_href(ASMState *as, IRIns *ir)
   l_next = emit_label(as);
 
   /* Type and value comparison. */
-  if (nilexit)
-    emit_jcc(as, CC_E, nilexit);
+  if (merge == IR_EQ)
+    asm_guardcc(as, CC_E);
   else
     emit_sjcc(as, CC_E, l_end);
   if (irt_isnum(kt)) {
@@ -1168,41 +1155,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 #endif
 }
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  IRIns *irkey;
-  Reg tmp;
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  tmp = ra_releasetmp(as, ASMREF_TMP1);
-  irkey = IR(ir->op2);
-  if (irt_isnum(irkey->t)) {
-    /* For numbers use the constant itself or a spill slot as a TValue. */
-    if (irref_isk(ir->op2))
-      emit_loada(as, tmp, ir_knum(irkey));
-    else
-      emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    if (!irref_isk(ir->op2)) {
-      Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
-      emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
-    } else if (!irt_ispri(irkey->t)) {
-      emit_movmroi(as, tmp, 0, irkey->i);
-    }
-    if (!(LJ_64 && irt_islightud(irkey->t)))
-      emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
-    emit_loada(as, tmp, &J2G(as->J)->tmptv);
-  }
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1262,7 +1214,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
   case IRT_U8: xo = XO_MOVZXb; break;
   case IRT_I16: xo = XO_MOVSXw; break;
   case IRT_U16: xo = XO_MOVZXw; break;
-  case IRT_NUM: xo = XMM_MOVRM(as); break;
+  case IRT_NUM: xo = XO_MOVSD; break;
   case IRT_FLOAT: xo = XO_MOVSS; break;
   default:
     if (LJ_64 && irt_is64(ir->t))
@@ -1275,6 +1227,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
   emit_mrm(as, xo, dest, RID_MRM);
 }
 
+#define asm_fload(as, ir)	asm_fxload(as, ir)
+#define asm_xload(as, ir)	asm_fxload(as, ir)
+
 static void asm_fxstore(ASMState *as, IRIns *ir)
 {
   RegSet allow = RSET_GPR;
@@ -1338,6 +1293,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_fstore(as, ir)	asm_fxstore(as, ir)
+#define asm_xstore(as, ir)	asm_fxstore(as, ir)
+
 #if LJ_64
 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
 {
@@ -1376,7 +1334,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
     Reg dest = ra_dest(as, ir, allow);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
-    emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
+    emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
   } else {
     asm_fuseahuref(as, ir->op1, RSET_GPR);
   }
@@ -1442,7 +1400,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
     Reg left = ra_scratch(as, RSET_FPR);
     asm_tointg(as, ir, left);  /* Frees dest reg. Do this before base alloc. */
     base = ra_alloc1(as, REF_BASE, RSET_GPR);
-    emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
+    emit_rmro(as, XO_MOVSD, left, base, ofs);
     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
 #if LJ_64
   } else if (irt_islightud(t)) {
@@ -1460,11 +1418,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
     if ((ir->op2 & IRSLOAD_CONVERT)) {
       t.irt = irt_isint(t) ? IRT_NUM : IRT_INT;  /* Check for original type. */
-      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
-    } else if (irt_isnum(t)) {
-      emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
+      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
     } else {
-      emit_rmro(as, XO_MOV, dest, base, ofs);
+      emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
     }
   } else {
     if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1491,15 +1447,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  lua_assert(sz != CTSIZE_INVALID);
+  IRRef args[4];
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   asm_setupresult(as, ir, ci);  /* GCcdata * */
 
@@ -1542,15 +1496,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
     } while (1);
 #endif
     lua_assert(sz == 4 || sz == 8);
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
 
   /* Combine initialization of marked, gct and ctypeid. */
   emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
   emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
-	   (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16)));
+	   (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
   emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
   emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
 
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
 }
@@ -1628,36 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
   }
 }
 
-/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
-static int fpmjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      /* The modified regs must match with the *.dasc implementation. */
-      RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-      IRIns *irx;
-      if (ra_hasreg(ir->r))
-	rset_clear(drop, ir->r);  /* Dest reg handled below. */
-      ra_evictset(as, drop);
-      ra_destreg(as, ir, RID_XMM0);
-      emit_call(as, lj_vm_pow_sse);
-      irx = IR(irpp->op1);
-      if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
-	irx->r = RID_INIT;  /* Avoid allocating xmm1 for x. */
-      ra_left(as, RID_XMM0, irpp->op1);
-      ra_left(as, RID_XMM1, irp->op2);
-      return 1;
-    }
-  }
-  return 0;
-}
-
 static void asm_fpmath(ASMState *as, IRIns *ir)
 {
-  IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
+  IRFPMathOp fpm = (IRFPMathOp)ir->op2;
   if (fpm == IRFPM_SQRT) {
     Reg dest = ra_dest(as, ir, RSET_FPR);
     Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1688,53 +1626,31 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
 		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
       ra_left(as, RID_XMM0, ir->op1);
     }
-  } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
+  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
     /* Rejoined to pow(). */
-  } else {  /* Handle x87 ops. */
-    int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
-    Reg dest = ir->r;
-    if (ra_hasreg(dest)) {
-      ra_free(as, dest);
-      ra_modified(as, dest);
-      emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
-    }
-    emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
-    switch (fpm) {  /* st0 = lj_vm_*(st0) */
-    case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
-    case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
-    case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
-    case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
-    case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
-    case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
-      /* Note: the use of fyl2xp1 would be pointless here. When computing
-      ** log(1.0+eps) the precision is already lost after 1.0 is added.
-      ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
-      */
-      emit_x87op(as, XI_FYL2X); break;
-    case IRFPM_OTHER:
-      switch (ir->o) {
-      case IR_ATAN2:
-	emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
-      case IR_LDEXP:
-	emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
-      default: lua_assert(0); break;
-      }
-      break;
-    default: lua_assert(0); break;
-    }
-    asm_x87load(as, ir->op1);
-    switch (fpm) {
-    case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
-    case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
-    case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
-    case IRFPM_OTHER:
-      if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
-      break;
-    default: break;
-    }
+  } else {
+    asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
   }
 }
 
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+
+static void asm_ldexp(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
+  Reg dest = ir->r;
+  if (ra_hasreg(dest)) {
+    ra_free(as, dest);
+    ra_modified(as, dest);
+    emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
+  }
+  emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+  emit_x87op(as, XI_FPOP1);
+  emit_x87op(as, XI_FSCALE);
+  asm_x87load(as, ir->op1);
+  asm_x87load(as, ir->op2);
+}
+
 static void asm_fppowi(ASMState *as, IRIns *ir)
 {
   /* The modified regs must match with the *.dasc implementation. */
@@ -1748,26 +1664,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
   ra_left(as, RID_EAX, ir->op2);
 }
 
-#if LJ_64 && LJ_HASFFI
-static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_pow(ASMState *as, IRIns *ir)
 {
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+					  IRCALL_lj_carith_powu64);
+  else
 #endif
-
-static void asm_intmod(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
+    asm_fppowi(as, ir);
 }
 
 static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1826,8 +1731,12 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
   Reg dest, right;
   int32_t k = 0;
   if (as->flagmcp == as->mcp) {  /* Drop test r,r instruction. */
-    as->flagmcp = NULL;
-    as->mcp += (LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2;
+    MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2);
+    if ((p[1] & 15) < 14) {
+      if ((p[1] & 15) >= 12) p[1] -= 4;  /* L <->S, NL <-> NS */
+      as->flagmcp = NULL;
+      as->mcp = p;
+    }  /* else: cannot transform LE/NLE to cc without use of OF. */
   }
   right = IR(rref)->r;
   if (ra_hasreg(right)) {
@@ -1946,6 +1855,44 @@ static void asm_add(ASMState *as, IRIns *ir)
     asm_intarith(as, ir, XOg_ADD);
 }
 
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_SUBSD);
+  else  /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
+    asm_intarith(as, ir, XOg_SUB);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MULSD);
+  else
+    asm_intarith(as, ir, XOg_X_IMUL);
+}
+
+static void asm_div(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+					  IRCALL_lj_carith_divu64);
+  else
+#endif
+    asm_fparith(as, ir, XO_DIVSD);
+}
+
+static void asm_mod(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isint(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					  IRCALL_lj_carith_modu64);
+  else
+#endif
+    asm_callid(as, ir, IRCALL_lj_vm_modi);
+}
+
 static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1953,7 +1900,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
   ra_left(as, dest, ir->op1);
 }
 
-static void asm_min_max(ASMState *as, IRIns *ir, int cc)
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_XORPS);
+  else
+    asm_neg_not(as, ir, XOg_NEG);
+}
+
+#define asm_abs(as, ir)		asm_fparith(as, ir, XO_ANDPS)
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 {
   Reg right, dest = ra_dest(as, ir, RSET_GPR);
   IRRef lref = ir->op1, rref = ir->op2;
@@ -1964,7 +1921,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
   ra_left(as, dest, lref);
 }
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_min(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MINSD);
+  else
+    asm_intmin_max(as, ir, CC_G);
+}
+
+static void asm_max(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MAXSD);
+  else
+    asm_intmin_max(as, ir, CC_L);
+}
+
+/* Note: don't use LEA for overflow-checking arithmetic! */
+#define asm_addov(as, ir)	asm_intarith(as, ir, XOg_ADD)
+#define asm_subov(as, ir)	asm_intarith(as, ir, XOg_SUB)
+#define asm_mulov(as, ir)	asm_intarith(as, ir, XOg_X_IMUL)
+
+#define asm_bnot(as, ir)	asm_neg_not(as, ir, XOg_NOT)
+
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1972,6 +1952,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   ra_left(as, dest, ir->op1);
 }
 
+#define asm_band(as, ir)	asm_intarith(as, ir, XOg_AND)
+#define asm_bor(as, ir)		asm_intarith(as, ir, XOg_OR)
+#define asm_bxor(as, ir)	asm_intarith(as, ir, XOg_XOR)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
 {
   IRRef rref = ir->op2;
@@ -2011,6 +1995,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
   */
 }
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, XOg_SHL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, XOg_SHR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, XOg_SAR)
+#define asm_brol(as, ir)	asm_bitshift(as, ir, XOg_ROL)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, XOg_ROR)
+
 /* -- Comparisons --------------------------------------------------------- */
 
 /* Virtual flags for unordered FP comparisons. */
@@ -2037,8 +2027,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
 };
 
 /* FP and integer comparisons. */
-static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
+static void asm_comp(ASMState *as, IRIns *ir)
 {
+  uint32_t cc = asm_compmap[ir->o];
   if (irt_isnum(ir->t)) {
     IRRef lref = ir->op1;
     IRRef rref = ir->op2;
@@ -2193,6 +2184,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
   }
 }
 
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_32 && LJ_HASFFI
 /* 64 bit integer comparisons in 32 bit mode. */
 static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2211,6 +2204,7 @@ static void asm_comp_int64(ASMState *as, IRIns *ir)
     lefthi = asm_fuseload(as, ir->op1, allow);
   } else {
     lefthi = ra_alloc1(as, ir->op1, allow);
+    rset_clear(allow, lefthi);
     righthi = asm_fuseload(as, ir->op2, allow);
     if (righthi == RID_MRM) {
       if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base);
@@ -2226,13 +2220,8 @@ static void asm_comp_int64(ASMState *as, IRIns *ir)
     leftlo = asm_fuseload(as, (ir-1)->op1, allow);
   } else {
     leftlo = ra_alloc1(as, (ir-1)->op1, allow);
+    rset_clear(allow, leftlo);
     rightlo = asm_fuseload(as, (ir-1)->op2, allow);
-    if (rightlo == RID_MRM) {
-      if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base);
-      if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx);
-    } else {
-      rset_clear(allow, rightlo);
-    }
   }
 
   /* All register allocations must be performed _before_ this point. */
@@ -2279,13 +2268,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
-    if (usehi || uselo) {
-      if (irt_isfp(ir->t))
-	asm_conv_fp_int64(as, ir);
-      else
-	asm_conv_int64_fp(as, ir);
-    }
     as->curins--;  /* Always skip the CONV. */
+    if (usehi || uselo)
+      asm_conv64(as, ir);
     return;
   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
     asm_comp_int64(as, ir);
@@ -2334,6 +2319,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 }
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_i8(as, HOOK_PROFILE);
+  emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2355,7 +2350,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
     emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
 	      ptr2addr(&J2G(as->J)->jit_base));
   emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
-  emit_getgl(as, r, jit_L);
+  emit_getgl(as, r, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
 }
@@ -2484,7 +2479,7 @@ static void asm_head_root_base(ASMState *as)
   Reg r = ir->r;
   if (ra_hasreg(r)) {
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (r != RID_BASE)
       emit_rr(as, XO_MOV, r, RID_BASE);
@@ -2498,7 +2493,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   Reg r = ir->r;
   if (ra_hasreg(r)) {
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
@@ -2583,169 +2578,12 @@ static void asm_tail_prep(ASMState *as)
   }
 }
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_EQ: case IR_NE: case IR_ABC:
-    asm_comp(as, ir, asm_compmap[ir->o]);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
-  case IR_BOR:  asm_intarith(as, ir, XOg_OR); break;
-  case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
-  case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
-  case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
-  case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
-  case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_SUBSD);
-    else  /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
-      asm_intarith(as, ir, XOg_SUB);
-    break;
-  case IR_MUL:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MULSD);
-    else
-      asm_intarith(as, ir, XOg_X_IMUL);
-    break;
-  case IR_DIV:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isnum(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
-					     IRCALL_lj_carith_divu64);
-    else
-#endif
-      asm_fparith(as, ir, XO_DIVSD);
-    break;
-  case IR_MOD:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isint(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
-					     IRCALL_lj_carith_modu64);
-    else
-#endif
-      asm_intmod(as, ir);
-    break;
-
-  case IR_NEG:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_XORPS);
-    else
-      asm_neg_not(as, ir, XOg_NEG);
-    break;
-  case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
-
-  case IR_MIN:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MINSD);
-    else
-      asm_min_max(as, ir, CC_G);
-    break;
-  case IR_MAX:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MAXSD);
-    else
-      asm_min_max(as, ir, CC_L);
-    break;
-
-  case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
-    asm_fpmath(as, ir);
-    break;
-  case IR_POW:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isnum(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
-					     IRCALL_lj_carith_powu64);
-    else
-#endif
-      asm_fppowi(as, ir);
-    break;
-
-  /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
-  case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
-  case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
-  case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   int nslots;
   asm_collectargs(as, ir, ci, args);
   nslots = asm_count_call_slots(as, ci, args);

+ 1 - 1
jni/LuaJIT-2.0.1/src/lj_bc.c → jni/LuaJIT-2.1/src/lj_bc.c

@@ -1,6 +1,6 @@
 /*
 ** Bytecode instruction modes.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_bc_c

Some files were not shown because too many files changed in this diff