Browse Source

Updated LuaJIT to latest 2.1 version

Martin Felis 10 years ago
parent
commit
28d2d92645
100 changed files with 6877 additions and 3792 deletions
  1. BIN
      jni/LuaJIT-2.0.1/android/armeabi-v7a/libluajit.a
  2. BIN
      jni/LuaJIT-2.0.1/android/armeabi/libluajit.a
  3. 0 74
      jni/LuaJIT-2.0.1/src/lib_bit.c
  4. 0 481
      jni/LuaJIT-2.0.1/src/lj_lex.c
  5. 0 339
      jni/LuaJIT-2.0.1/src/lj_str.c
  6. 0 50
      jni/LuaJIT-2.0.1/src/lj_str.h
  7. 0 0
      jni/LuaJIT-2.1/Android.mk
  8. 1 1
      jni/LuaJIT-2.1/COPYRIGHT
  9. 21 11
      jni/LuaJIT-2.1/Makefile
  10. 3 3
      jni/LuaJIT-2.1/README
  11. BIN
      jni/LuaJIT-2.1/android/armeabi-v7a/libluajit.a
  12. BIN
      jni/LuaJIT-2.1/android/armeabi/libluajit.a
  13. 2 2
      jni/LuaJIT-2.1/buildandroid.sh
  14. 1 1
      jni/LuaJIT-2.1/doc/bluequad-print.css
  15. 1 1
      jni/LuaJIT-2.1/doc/bluequad.css
  16. 177 232
      jni/LuaJIT-2.1/doc/changes.html
  17. 5 3
      jni/LuaJIT-2.1/doc/contact.html
  18. 4 2
      jni/LuaJIT-2.1/doc/ext_c_api.html
  19. 4 2
      jni/LuaJIT-2.1/doc/ext_ffi.html
  20. 8 4
      jni/LuaJIT-2.1/doc/ext_ffi_api.html
  21. 27 7
      jni/LuaJIT-2.1/doc/ext_ffi_semantics.html
  22. 5 3
      jni/LuaJIT-2.1/doc/ext_ffi_tutorial.html
  23. 5 3
      jni/LuaJIT-2.1/doc/ext_jit.html
  24. 365 0
      jni/LuaJIT-2.1/doc/ext_profiler.html
  25. 53 5
      jni/LuaJIT-2.1/doc/extensions.html
  26. 4 2
      jni/LuaJIT-2.1/doc/faq.html
  27. 0 0
      jni/LuaJIT-2.1/doc/img/contact.png
  28. 92 42
      jni/LuaJIT-2.1/doc/install.html
  29. 13 5
      jni/LuaJIT-2.1/doc/luajit.html
  30. 6 3
      jni/LuaJIT-2.1/doc/running.html
  31. 6 13
      jni/LuaJIT-2.1/doc/status.html
  32. 3 2
      jni/LuaJIT-2.1/dynasm/dasm_arm.h
  33. 9 6
      jni/LuaJIT-2.1/dynasm/dasm_arm.lua
  34. 518 0
      jni/LuaJIT-2.1/dynasm/dasm_arm64.h
  35. 1166 0
      jni/LuaJIT-2.1/dynasm/dasm_arm64.lua
  36. 3 2
      jni/LuaJIT-2.1/dynasm/dasm_mips.h
  37. 4 4
      jni/LuaJIT-2.1/dynasm/dasm_mips.lua
  38. 13 5
      jni/LuaJIT-2.1/dynasm/dasm_ppc.h
  39. 687 17
      jni/LuaJIT-2.1/dynasm/dasm_ppc.lua
  40. 3 3
      jni/LuaJIT-2.1/dynasm/dasm_proto.h
  41. 1 1
      jni/LuaJIT-2.1/dynasm/dasm_x64.lua
  42. 9 3
      jni/LuaJIT-2.1/dynasm/dasm_x86.h
  43. 366 80
      jni/LuaJIT-2.1/dynasm/dasm_x86.lua
  44. 10 11
      jni/LuaJIT-2.1/dynasm/dynasm.lua
  45. 1 1
      jni/LuaJIT-2.1/etc/luajit.1
  46. 6 5
      jni/LuaJIT-2.1/etc/luajit.pc
  47. 1 1
      jni/LuaJIT-2.1/src/.gitignore
  48. 90 59
      jni/LuaJIT-2.1/src/Makefile
  49. 111 93
      jni/LuaJIT-2.1/src/Makefile.dep
  50. 0 0
      jni/LuaJIT-2.1/src/host/.gitignore
  51. 0 0
      jni/LuaJIT-2.1/src/host/README
  52. 18 10
      jni/LuaJIT-2.1/src/host/buildvm.c
  53. 2 1
      jni/LuaJIT-2.1/src/host/buildvm.h
  54. 44 12
      jni/LuaJIT-2.1/src/host/buildvm_asm.c
  55. 1 1
      jni/LuaJIT-2.1/src/host/buildvm_fold.c
  56. 61 2
      jni/LuaJIT-2.1/src/host/buildvm_lib.c
  57. 45 0
      jni/LuaJIT-2.1/src/host/buildvm_libbc.h
  58. 1 1
      jni/LuaJIT-2.1/src/host/buildvm_peobj.c
  59. 197 0
      jni/LuaJIT-2.1/src/host/genlibbc.lua
  60. 2 1
      jni/LuaJIT-2.1/src/host/genminilua.lua
  61. 1 0
      jni/LuaJIT-2.1/src/host/minilua.c
  62. 0 0
      jni/LuaJIT-2.1/src/jit/.gitignore
  63. 10 11
      jni/LuaJIT-2.1/src/jit/bc.lua
  64. 16 14
      jni/LuaJIT-2.1/src/jit/bcsave.lua
  65. 10 10
      jni/LuaJIT-2.1/src/jit/dis_arm.lua
  66. 16 16
      jni/LuaJIT-2.1/src/jit/dis_mips.lua
  67. 7 10
      jni/LuaJIT-2.1/src/jit/dis_mipsel.lua
  68. 10 10
      jni/LuaJIT-2.1/src/jit/dis_ppc.lua
  69. 7 10
      jni/LuaJIT-2.1/src/jit/dis_x64.lua
  70. 176 89
      jni/LuaJIT-2.1/src/jit/dis_x86.lua
  71. 21 14
      jni/LuaJIT-2.1/src/jit/dump.lua
  72. 310 0
      jni/LuaJIT-2.1/src/jit/p.lua
  73. 10 7
      jni/LuaJIT-2.1/src/jit/v.lua
  74. 45 0
      jni/LuaJIT-2.1/src/jit/zone.lua
  75. 0 0
      jni/LuaJIT-2.1/src/lauxlib.h
  76. 2 2
      jni/LuaJIT-2.1/src/lib_aux.c
  77. 34 53
      jni/LuaJIT-2.1/src/lib_base.c
  78. 180 0
      jni/LuaJIT-2.1/src/lib_bit.c
  79. 2 2
      jni/LuaJIT-2.1/src/lib_debug.c
  80. 37 15
      jni/LuaJIT-2.1/src/lib_ffi.c
  81. 1 1
      jni/LuaJIT-2.1/src/lib_init.c
  82. 30 24
      jni/LuaJIT-2.1/src/lib_io.c
  83. 131 27
      jni/LuaJIT-2.1/src/lib_jit.c
  84. 4 7
      jni/LuaJIT-2.1/src/lib_math.c
  85. 32 20
      jni/LuaJIT-2.1/src/lib_os.c
  86. 12 7
      jni/LuaJIT-2.1/src/lib_package.c
  87. 131 319
      jni/LuaJIT-2.1/src/lib_string.c
  88. 90 83
      jni/LuaJIT-2.1/src/lib_table.c
  89. 15 0
      jni/LuaJIT-2.1/src/lj.supp
  90. 30 13
      jni/LuaJIT-2.1/src/lj_alloc.c
  91. 0 0
      jni/LuaJIT-2.1/src/lj_alloc.h
  92. 76 63
      jni/LuaJIT-2.1/src/lj_api.c
  93. 146 49
      jni/LuaJIT-2.1/src/lj_arch.h
  94. 471 79
      jni/LuaJIT-2.1/src/lj_asm.c
  95. 1 1
      jni/LuaJIT-2.1/src/lj_asm.h
  96. 173 308
      jni/LuaJIT-2.1/src/lj_asm_arm.h
  97. 118 259
      jni/LuaJIT-2.1/src/lj_asm_mips.h
  98. 119 269
      jni/LuaJIT-2.1/src/lj_asm_ppc.h
  99. 227 389
      jni/LuaJIT-2.1/src/lj_asm_x86.h
  100. 1 1
      jni/LuaJIT-2.1/src/lj_bc.c

BIN
jni/LuaJIT-2.0.1/android/armeabi-v7a/libluajit.a


BIN
jni/LuaJIT-2.0.1/android/armeabi/libluajit.a


+ 0 - 74
jni/LuaJIT-2.0.1/src/lib_bit.c

@@ -1,74 +0,0 @@
-/*
-** Bit manipulation library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
-*/
-
-#define lib_bit_c
-#define LUA_LIB
-
-#include "lua.h"
-#include "lauxlib.h"
-#include "lualib.h"
-
-#include "lj_obj.h"
-#include "lj_err.h"
-#include "lj_str.h"
-#include "lj_lib.h"
-
-/* ------------------------------------------------------------------------ */
-
-#define LJLIB_MODULE_bit
-
-LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_unary IR_TOBIT)
-{
-  lj_lib_checknumber(L, 1);
-  return FFH_RETRY;
-}
-LJLIB_ASM_(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
-LJLIB_ASM_(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
-
-LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
-{
-  lj_lib_checknumber(L, 1);
-  lj_lib_checkbit(L, 2);
-  return FFH_RETRY;
-}
-LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
-LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
-LJLIB_ASM_(bit_rol)		LJLIB_REC(bit_shift IR_BROL)
-LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
-
-LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
-{
-  int i = 0;
-  do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
-  return FFH_RETRY;
-}
-LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
-LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
-
-/* ------------------------------------------------------------------------ */
-
-LJLIB_CF(bit_tohex)
-{
-  uint32_t b = (uint32_t)lj_lib_checkbit(L, 1);
-  int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2);
-  const char *hexdigits = "0123456789abcdef";
-  char buf[8];
-  if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
-  if (n > 8) n = 8;
-  for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
-  lua_pushlstring(L, buf, (size_t)n);
-  return 1;
-}
-
-/* ------------------------------------------------------------------------ */
-
-#include "lj_libdef.h"
-
-LUALIB_API int luaopen_bit(lua_State *L)
-{
-  LJ_LIB_REG(L, LUA_BITLIBNAME, bit);
-  return 1;
-}
-

+ 0 - 481
jni/LuaJIT-2.0.1/src/lj_lex.c

@@ -1,481 +0,0 @@
-/*
-** Lexical analyzer.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
-**
-** Major portions taken verbatim or adapted from the Lua interpreter.
-** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
-*/
-
-#define lj_lex_c
-#define LUA_CORE
-
-#include "lj_obj.h"
-#include "lj_gc.h"
-#include "lj_err.h"
-#include "lj_str.h"
-#if LJ_HASFFI
-#include "lj_tab.h"
-#include "lj_ctype.h"
-#include "lj_cdata.h"
-#include "lualib.h"
-#endif
-#include "lj_state.h"
-#include "lj_lex.h"
-#include "lj_parse.h"
-#include "lj_char.h"
-#include "lj_strscan.h"
-
-/* Lua lexer token names. */
-static const char *const tokennames[] = {
-#define TKSTR1(name)		#name,
-#define TKSTR2(name, sym)	#sym,
-TKDEF(TKSTR1, TKSTR2)
-#undef TKSTR1
-#undef TKSTR2
-  NULL
-};
-
-/* -- Buffer handling ----------------------------------------------------- */
-
-#define char2int(c)		((int)(uint8_t)(c))
-#define next(ls) \
-  (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
-#define save_and_next(ls)	(save(ls, ls->current), next(ls))
-#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
-#define END_OF_STREAM		(-1)
-
-static int fillbuf(LexState *ls)
-{
-  size_t sz;
-  const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
-  if (buf == NULL || sz == 0) return END_OF_STREAM;
-  ls->n = (MSize)sz - 1;
-  ls->p = buf;
-  return char2int(*(ls->p++));
-}
-
-static LJ_NOINLINE void save_grow(LexState *ls, int c)
-{
-  MSize newsize;
-  if (ls->sb.sz >= LJ_MAX_STR/2)
-    lj_lex_error(ls, 0, LJ_ERR_XELEM);
-  newsize = ls->sb.sz * 2;
-  lj_str_resizebuf(ls->L, &ls->sb, newsize);
-  ls->sb.buf[ls->sb.n++] = (char)c;
-}
-
-static LJ_AINLINE void save(LexState *ls, int c)
-{
-  if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz))
-    save_grow(ls, c);
-  else
-    ls->sb.buf[ls->sb.n++] = (char)c;
-}
-
-static void inclinenumber(LexState *ls)
-{
-  int old = ls->current;
-  lua_assert(currIsNewline(ls));
-  next(ls);  /* skip `\n' or `\r' */
-  if (currIsNewline(ls) && ls->current != old)
-    next(ls);  /* skip `\n\r' or `\r\n' */
-  if (++ls->linenumber >= LJ_MAX_LINE)
-    lj_lex_error(ls, ls->token, LJ_ERR_XLINES);
-}
-
-/* -- Scanner for terminals ----------------------------------------------- */
-
-/* Parse a number literal. */
-static void lex_number(LexState *ls, TValue *tv)
-{
-  StrScanFmt fmt;
-  int c, xp = 'e';
-  lua_assert(lj_char_isdigit(ls->current));
-  if ((c = ls->current) == '0') {
-    save_and_next(ls);
-    if ((ls->current | 0x20) == 'x') xp = 'p';
-  }
-  while (lj_char_isident(ls->current) || ls->current == '.' ||
-	 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) {
-    c = ls->current;
-    save_and_next(ls);
-  }
-  save(ls, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv,
-	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
-	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
-  if (LJ_DUALNUM && fmt == STRSCAN_INT) {
-    setitype(tv, LJ_TISNUM);
-  } else if (fmt == STRSCAN_NUM) {
-    /* Already in correct format. */
-#if LJ_HASFFI
-  } else if (fmt != STRSCAN_ERROR) {
-    lua_State *L = ls->L;
-    GCcdata *cd;
-    lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG);
-    if (!ctype_ctsG(G(L))) {
-      ptrdiff_t oldtop = savestack(L, L->top);
-      luaopen_ffi(L);  /* Load FFI library on-demand. */
-      L->top = restorestack(L, oldtop);
-    }
-    if (fmt == STRSCAN_IMAG) {
-      cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
-      ((double *)cdataptr(cd))[0] = 0;
-      ((double *)cdataptr(cd))[1] = numV(tv);
-    } else {
-      cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, 8);
-      *(uint64_t *)cdataptr(cd) = tv->u64;
-    }
-    lj_parse_keepcdata(ls, tv, cd);
-#endif
-  } else {
-    lua_assert(fmt == STRSCAN_ERROR);
-    lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
-  }
-}
-
-static int skip_sep(LexState *ls)
-{
-  int count = 0;
-  int s = ls->current;
-  lua_assert(s == '[' || s == ']');
-  save_and_next(ls);
-  while (ls->current == '=') {
-    save_and_next(ls);
-    count++;
-  }
-  return (ls->current == s) ? count : (-count) - 1;
-}
-
-static void read_long_string(LexState *ls, TValue *tv, int sep)
-{
-  save_and_next(ls);  /* skip 2nd `[' */
-  if (currIsNewline(ls))  /* string starts with a newline? */
-    inclinenumber(ls);  /* skip it */
-  for (;;) {
-    switch (ls->current) {
-    case END_OF_STREAM:
-      lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
-      break;
-    case ']':
-      if (skip_sep(ls) == sep) {
-	save_and_next(ls);  /* skip 2nd `]' */
-	goto endloop;
-      }
-      break;
-    case '\n':
-    case '\r':
-      save(ls, '\n');
-      inclinenumber(ls);
-      if (!tv) lj_str_resetbuf(&ls->sb);  /* avoid wasting space */
-      break;
-    default:
-      if (tv) save_and_next(ls);
-      else next(ls);
-      break;
-    }
-  } endloop:
-  if (tv) {
-    GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep),
-				      ls->sb.n - 2*(2 + (MSize)sep));
-    setstrV(ls->L, tv, str);
-  }
-}
-
-static void read_string(LexState *ls, int delim, TValue *tv)
-{
-  save_and_next(ls);
-  while (ls->current != delim) {
-    switch (ls->current) {
-    case END_OF_STREAM:
-      lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
-      continue;
-    case '\n':
-    case '\r':
-      lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
-      continue;
-    case '\\': {
-      int c = next(ls);  /* Skip the '\\'. */
-      switch (c) {
-      case 'a': c = '\a'; break;
-      case 'b': c = '\b'; break;
-      case 'f': c = '\f'; break;
-      case 'n': c = '\n'; break;
-      case 'r': c = '\r'; break;
-      case 't': c = '\t'; break;
-      case 'v': c = '\v'; break;
-      case 'x':  /* Hexadecimal escape '\xXX'. */
-	c = (next(ls) & 15u) << 4;
-	if (!lj_char_isdigit(ls->current)) {
-	  if (!lj_char_isxdigit(ls->current)) goto err_xesc;
-	  c += 9 << 4;
-	}
-	c += (next(ls) & 15u);
-	if (!lj_char_isdigit(ls->current)) {
-	  if (!lj_char_isxdigit(ls->current)) goto err_xesc;
-	  c += 9;
-	}
-	break;
-      case 'z':  /* Skip whitespace. */
-	next(ls);
-	while (lj_char_isspace(ls->current))
-	  if (currIsNewline(ls)) inclinenumber(ls); else next(ls);
-	continue;
-      case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue;
-      case '\\': case '\"': case '\'': break;
-      case END_OF_STREAM: continue;
-      default:
-	if (!lj_char_isdigit(c))
-	  goto err_xesc;
-	c -= '0';  /* Decimal escape '\ddd'. */
-	if (lj_char_isdigit(next(ls))) {
-	  c = c*10 + (ls->current - '0');
-	  if (lj_char_isdigit(next(ls))) {
-	    c = c*10 + (ls->current - '0');
-	    if (c > 255) {
-	    err_xesc:
-	      lj_lex_error(ls, TK_string, LJ_ERR_XESC);
-	    }
-	    next(ls);
-	  }
-	}
-	save(ls, c);
-	continue;
-      }
-      save(ls, c);
-      next(ls);
-      continue;
-      }
-    default:
-      save_and_next(ls);
-      break;
-    }
-  }
-  save_and_next(ls);  /* skip delimiter */
-  setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2));
-}
-
-/* -- Main lexical scanner ------------------------------------------------ */
-
-static int llex(LexState *ls, TValue *tv)
-{
-  lj_str_resetbuf(&ls->sb);
-  for (;;) {
-    if (lj_char_isident(ls->current)) {
-      GCstr *s;
-      if (lj_char_isdigit(ls->current)) {  /* Numeric literal. */
-	lex_number(ls, tv);
-	return TK_number;
-      }
-      /* Identifier or reserved word. */
-      do {
-	save_and_next(ls);
-      } while (lj_char_isident(ls->current));
-      s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n);
-      setstrV(ls->L, tv, s);
-      if (s->reserved > 0)  /* Reserved word? */
-	return TK_OFS + s->reserved;
-      return TK_name;
-    }
-    switch (ls->current) {
-    case '\n':
-    case '\r':
-      inclinenumber(ls);
-      continue;
-    case ' ':
-    case '\t':
-    case '\v':
-    case '\f':
-      next(ls);
-      continue;
-    case '-':
-      next(ls);
-      if (ls->current != '-') return '-';
-      /* else is a comment */
-      next(ls);
-      if (ls->current == '[') {
-	int sep = skip_sep(ls);
-	lj_str_resetbuf(&ls->sb);  /* `skip_sep' may dirty the buffer */
-	if (sep >= 0) {
-	  read_long_string(ls, NULL, sep);  /* long comment */
-	  lj_str_resetbuf(&ls->sb);
-	  continue;
-	}
-      }
-      /* else short comment */
-      while (!currIsNewline(ls) && ls->current != END_OF_STREAM)
-	next(ls);
-      continue;
-    case '[': {
-      int sep = skip_sep(ls);
-      if (sep >= 0) {
-	read_long_string(ls, tv, sep);
-	return TK_string;
-      } else if (sep == -1) {
-	return '[';
-      } else {
-	lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM);
-	continue;
-      }
-      }
-    case '=':
-      next(ls);
-      if (ls->current != '=') return '='; else { next(ls); return TK_eq; }
-    case '<':
-      next(ls);
-      if (ls->current != '=') return '<'; else { next(ls); return TK_le; }
-    case '>':
-      next(ls);
-      if (ls->current != '=') return '>'; else { next(ls); return TK_ge; }
-    case '~':
-      next(ls);
-      if (ls->current != '=') return '~'; else { next(ls); return TK_ne; }
-    case ':':
-      next(ls);
-      if (ls->current != ':') return ':'; else { next(ls); return TK_label; }
-    case '"':
-    case '\'':
-      read_string(ls, ls->current, tv);
-      return TK_string;
-    case '.':
-      save_and_next(ls);
-      if (ls->current == '.') {
-	next(ls);
-	if (ls->current == '.') {
-	  next(ls);
-	  return TK_dots;   /* ... */
-	}
-	return TK_concat;   /* .. */
-      } else if (!lj_char_isdigit(ls->current)) {
-	return '.';
-      } else {
-	lex_number(ls, tv);
-	return TK_number;
-      }
-    case END_OF_STREAM:
-      return TK_eof;
-    default: {
-      int c = ls->current;
-      next(ls);
-      return c;  /* Single-char tokens (+ - / ...). */
-    }
-    }
-  }
-}
-
-/* -- Lexer API ----------------------------------------------------------- */
-
-/* Setup lexer state. */
-int lj_lex_setup(lua_State *L, LexState *ls)
-{
-  int header = 0;
-  ls->L = L;
-  ls->fs = NULL;
-  ls->n = 0;
-  ls->p = NULL;
-  ls->vstack = NULL;
-  ls->sizevstack = 0;
-  ls->vtop = 0;
-  ls->bcstack = NULL;
-  ls->sizebcstack = 0;
-  ls->lookahead = TK_eof;  /* No look-ahead token. */
-  ls->linenumber = 1;
-  ls->lastline = 1;
-  lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF);
-  next(ls);  /* Read-ahead first char. */
-  if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb &&
-      char2int(ls->p[1]) == 0xbf) {  /* Skip UTF-8 BOM (if buffered). */
-    ls->n -= 2;
-    ls->p += 2;
-    next(ls);
-    header = 1;
-  }
-  if (ls->current == '#') {  /* Skip POSIX #! header line. */
-    do {
-      next(ls);
-      if (ls->current == END_OF_STREAM) return 0;
-    } while (!currIsNewline(ls));
-    inclinenumber(ls);
-    header = 1;
-  }
-  if (ls->current == LUA_SIGNATURE[0]) {  /* Bytecode dump. */
-    if (header) {
-      /*
-      ** Loading bytecode with an extra header is disabled for security
-      ** reasons. This may circumvent the usual check for bytecode vs.
-      ** Lua code by looking at the first char. Since this is a potential
-      ** security violation no attempt is made to echo the chunkname either.
-      */
-      setstrV(L, L->top++, lj_err_str(L, LJ_ERR_BCBAD));
-      lj_err_throw(L, LUA_ERRSYNTAX);
-    }
-    return 1;
-  }
-  return 0;
-}
-
-/* Cleanup lexer state. */
-void lj_lex_cleanup(lua_State *L, LexState *ls)
-{
-  global_State *g = G(L);
-  lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
-  lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
-  lj_str_freebuf(g, &ls->sb);
-}
-
-void lj_lex_next(LexState *ls)
-{
-  ls->lastline = ls->linenumber;
-  if (LJ_LIKELY(ls->lookahead == TK_eof)) {  /* No lookahead token? */
-    ls->token = llex(ls, &ls->tokenval);  /* Get next token. */
-  } else {  /* Otherwise return lookahead token. */
-    ls->token = ls->lookahead;
-    ls->lookahead = TK_eof;
-    ls->tokenval = ls->lookaheadval;
-  }
-}
-
-LexToken lj_lex_lookahead(LexState *ls)
-{
-  lua_assert(ls->lookahead == TK_eof);
-  ls->lookahead = llex(ls, &ls->lookaheadval);
-  return ls->lookahead;
-}
-
-const char *lj_lex_token2str(LexState *ls, LexToken token)
-{
-  if (token > TK_OFS)
-    return tokennames[token-TK_OFS-1];
-  else if (!lj_char_iscntrl(token))
-    return lj_str_pushf(ls->L, "%c", token);
-  else
-    return lj_str_pushf(ls->L, "char(%d)", token);
-}
-
-void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...)
-{
-  const char *tok;
-  va_list argp;
-  if (token == 0) {
-    tok = NULL;
-  } else if (token == TK_name || token == TK_string || token == TK_number) {
-    save(ls, '\0');
-    tok = ls->sb.buf;
-  } else {
-    tok = lj_lex_token2str(ls, token);
-  }
-  va_start(argp, em);
-  lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp);
-  va_end(argp);
-}
-
-void lj_lex_init(lua_State *L)
-{
-  uint32_t i;
-  for (i = 0; i < TK_RESERVED; i++) {
-    GCstr *s = lj_str_newz(L, tokennames[i]);
-    fixstring(s);  /* Reserved words are never collected. */
-    s->reserved = (uint8_t)(i+1);
-  }
-}
-

+ 0 - 339
jni/LuaJIT-2.0.1/src/lj_str.c

@@ -1,339 +0,0 @@
-/*
-** String handling.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
-**
-** Portions taken verbatim or adapted from the Lua interpreter.
-** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
-*/
-
-#include <stdio.h>
-
-#define lj_str_c
-#define LUA_CORE
-
-#include "lj_obj.h"
-#include "lj_gc.h"
-#include "lj_err.h"
-#include "lj_str.h"
-#include "lj_state.h"
-#include "lj_char.h"
-
-/* -- String interning ---------------------------------------------------- */
-
-/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
-int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
-{
-  MSize i, n = a->len > b->len ? b->len : a->len;
-  for (i = 0; i < n; i += 4) {
-    /* Note: innocuous access up to end of string + 3. */
-    uint32_t va = *(const uint32_t *)(strdata(a)+i);
-    uint32_t vb = *(const uint32_t *)(strdata(b)+i);
-    if (va != vb) {
-#if LJ_LE
-      va = lj_bswap(va); vb = lj_bswap(vb);
-#endif
-      i -= n;
-      if ((int32_t)i >= -3) {
-	va >>= 32+(i<<3); vb >>= 32+(i<<3);
-	if (va == vb) break;
-      }
-      return va < vb ? -1 : 1;
-    }
-  }
-  return (int32_t)(a->len - b->len);
-}
-
-/* Fast string data comparison. Caveat: unaligned access to 1st string! */
-static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
-{
-  MSize i = 0;
-  lua_assert(len > 0);
-  lua_assert((((uintptr_t)a + len) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
-  do {  /* Note: innocuous access up to end of string + 3. */
-    uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
-    if (v) {
-      i -= len;
-#if LJ_LE
-      return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
-#else
-      return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
-#endif
-    }
-    i += 4;
-  } while (i < len);
-  return 0;
-}
-
-/* Resize the string hash table (grow and shrink). */
-void lj_str_resize(lua_State *L, MSize newmask)
-{
-  global_State *g = G(L);
-  GCRef *newhash;
-  MSize i;
-  if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
-    return;  /* No resizing during GC traversal or if already too big. */
-  newhash = lj_mem_newvec(L, newmask+1, GCRef);
-  memset(newhash, 0, (newmask+1)*sizeof(GCRef));
-  for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
-    GCobj *p = gcref(g->strhash[i]);
-    while (p) {  /* Follow each hash chain and reinsert all strings. */
-      MSize h = gco2str(p)->hash & newmask;
-      GCobj *next = gcnext(p);
-      /* NOBARRIER: The string table is a GC root. */
-      setgcrefr(p->gch.nextgc, newhash[h]);
-      setgcref(newhash[h], p);
-      p = next;
-    }
-  }
-  lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
-  g->strmask = newmask;
-  g->strhash = newhash;
-}
-
-/* Intern a string and return string object. */
-GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
-{
-  global_State *g;
-  GCstr *s;
-  GCobj *o;
-  MSize len = (MSize)lenx;
-  MSize a, b, h = len;
-  if (lenx >= LJ_MAX_STR)
-    lj_err_msg(L, LJ_ERR_STROV);
-  g = G(L);
-  /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
-  if (len >= 4) {  /* Caveat: unaligned access! */
-    a = lj_getu32(str);
-    h ^= lj_getu32(str+len-4);
-    b = lj_getu32(str+(len>>1)-2);
-    h ^= b; h -= lj_rol(b, 14);
-    b += lj_getu32(str+(len>>2)-1);
-  } else if (len > 0) {
-    a = *(const uint8_t *)str;
-    h ^= *(const uint8_t *)(str+len-1);
-    b = *(const uint8_t *)(str+(len>>1));
-    h ^= b; h -= lj_rol(b, 14);
-  } else {
-    return &g->strempty;
-  }
-  a ^= h; a -= lj_rol(h, 11);
-  b ^= a; b -= lj_rol(a, 25);
-  h ^= b; h -= lj_rol(b, 16);
-  /* Check if the string has already been interned. */
-  o = gcref(g->strhash[h & g->strmask]);
-  if (LJ_LIKELY((((uintptr_t)str + len) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
-    while (o != NULL) {
-      GCstr *sx = gco2str(o);
-      if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
-	/* Resurrect if dead. Can only happen with fixstring() (keywords). */
-	if (isdead(g, o)) flipwhite(o);
-	return sx;  /* Return existing string. */
-      }
-      o = gcnext(o);
-    }
-  } else {  /* Slow path: end of string is too close to a page boundary. */
-    while (o != NULL) {
-      GCstr *sx = gco2str(o);
-      if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
-	/* Resurrect if dead. Can only happen with fixstring() (keywords). */
-	if (isdead(g, o)) flipwhite(o);
-	return sx;  /* Return existing string. */
-      }
-      o = gcnext(o);
-    }
-  }
-  /* Nope, create a new string. */
-  s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
-  newwhite(g, s);
-  s->gct = ~LJ_TSTR;
-  s->len = len;
-  s->hash = h;
-  s->reserved = 0;
-  memcpy(strdatawr(s), str, len);
-  strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
-  /* Add it to string hash table. */
-  h &= g->strmask;
-  s->nextgc = g->strhash[h];
-  /* NOBARRIER: The string table is a GC root. */
-  setgcref(g->strhash[h], obj2gco(s));
-  if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
-    lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
-  return s;  /* Return newly interned string. */
-}
-
-void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
-{
-  g->strnum--;
-  lj_mem_free(g, s, sizestring(s));
-}
-
-/* -- Type conversions ---------------------------------------------------- */
-
-/* Print number to buffer. Canonicalizes non-finite values. */
-size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
-{
-  if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) {  /* Finite? */
-    lua_Number n = o->n;
-#if __BIONIC__
-    if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
-#endif
-    return (size_t)lua_number2str(s, n);
-  } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
-    s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
-  } else if ((o->u32.hi & 0x80000000) == 0) {
-    s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
-  } else {
-    s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
-  }
-}
-
-/* Print integer to buffer. Returns pointer to start. */
-char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
-{
-  uint32_t u = (uint32_t)(k < 0 ? -k : k);
-  p += 1+10;
-  do { *--p = (char)('0' + u % 10); } while (u /= 10);
-  if (k < 0) *--p = '-';
-  return p;
-}
-
-/* Convert number to string. */
-GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
-{
-  char buf[LJ_STR_NUMBUF];
-  size_t len = lj_str_bufnum(buf, (TValue *)np);
-  return lj_str_new(L, buf, len);
-}
-
-/* Convert integer to string. */
-GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
-{
-  char s[1+10];
-  char *p = lj_str_bufint(s, k);
-  return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
-}
-
-GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
-{
-  return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
-}
-
-/* -- String formatting --------------------------------------------------- */
-
-static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
-{
-  char *p;
-  MSize i;
-  if (sb->n + len > sb->sz) {
-    MSize sz = sb->sz * 2;
-    while (sb->n + len > sz) sz = sz * 2;
-    lj_str_resizebuf(L, sb, sz);
-  }
-  p = sb->buf + sb->n;
-  sb->n += len;
-  for (i = 0; i < len; i++) p[i] = str[i];
-}
-
-static void addchar(lua_State *L, SBuf *sb, int c)
-{
-  if (sb->n + 1 > sb->sz) {
-    MSize sz = sb->sz * 2;
-    lj_str_resizebuf(L, sb, sz);
-  }
-  sb->buf[sb->n++] = (char)c;
-}
-
-/* Push formatted message as a string object to Lua stack. va_list variant. */
-const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
-{
-  SBuf *sb = &G(L)->tmpbuf;
-  lj_str_needbuf(L, sb, (MSize)strlen(fmt));
-  lj_str_resetbuf(sb);
-  for (;;) {
-    const char *e = strchr(fmt, '%');
-    if (e == NULL) break;
-    addstr(L, sb, fmt, (MSize)(e-fmt));
-    /* This function only handles %s, %c, %d, %f and %p formats. */
-    switch (e[1]) {
-    case 's': {
-      const char *s = va_arg(argp, char *);
-      if (s == NULL) s = "(null)";
-      addstr(L, sb, s, (MSize)strlen(s));
-      break;
-      }
-    case 'c':
-      addchar(L, sb, va_arg(argp, int));
-      break;
-    case 'd': {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
-      addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
-      break;
-      }
-    case 'f': {
-      char buf[LJ_STR_NUMBUF];
-      TValue tv;
-      MSize len;
-      tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
-      len = (MSize)lj_str_bufnum(buf, &tv);
-      addstr(L, sb, buf, len);
-      break;
-      }
-    case 'p': {
-#define FMTP_CHARS	(2*sizeof(ptrdiff_t))
-      char buf[2+FMTP_CHARS];
-      ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
-      ptrdiff_t i, lasti = 2+FMTP_CHARS;
-      if (p == 0) {
-	addstr(L, sb, "NULL", 4);
-	break;
-      }
-#if LJ_64
-      /* Shorten output for 64 bit pointers. */
-      lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
-#endif
-      buf[0] = '0';
-      buf[1] = 'x';
-      for (i = lasti-1; i >= 2; i--, p >>= 4)
-	buf[i] = "0123456789abcdef"[(p & 15)];
-      addstr(L, sb, buf, (MSize)lasti);
-      break;
-      }
-    case '%':
-      addchar(L, sb, '%');
-      break;
-    default:
-      addchar(L, sb, '%');
-      addchar(L, sb, e[1]);
-      break;
-    }
-    fmt = e+2;
-  }
-  addstr(L, sb, fmt, (MSize)strlen(fmt));
-  setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
-  incr_top(L);
-  return strVdata(L->top - 1);
-}
-
-/* Push formatted message as a string object to Lua stack. Vararg variant. */
-const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
-{
-  const char *msg;
-  va_list argp;
-  va_start(argp, fmt);
-  msg = lj_str_pushvf(L, fmt, argp);
-  va_end(argp);
-  return msg;
-}
-
-/* -- Buffer handling ----------------------------------------------------- */
-
-char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
-{
-  if (sz > sb->sz) {
-    if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
-    lj_str_resizebuf(L, sb, sz);
-  }
-  return sb->buf;
-}
-

+ 0 - 50
jni/LuaJIT-2.0.1/src/lj_str.h

@@ -1,50 +0,0 @@
-/*
-** String handling.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
-*/
-
-#ifndef _LJ_STR_H
-#define _LJ_STR_H
-
-#include <stdarg.h>
-
-#include "lj_obj.h"
-
-/* String interning. */
-LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
-LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
-LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
-LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
-
-#define lj_str_newz(L, s)	(lj_str_new(L, s, strlen(s)))
-#define lj_str_newlit(L, s)	(lj_str_new(L, "" s, sizeof(s)-1))
-
-/* Type conversions. */
-LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
-LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
-LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
-LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
-LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
-
-#define LJ_STR_INTBUF		(1+10)
-#define LJ_STR_NUMBUF		LUAI_MAXNUMBER2STR
-
-/* String formatting. */
-LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
-LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
-#if defined(__GNUC__)
-  __attribute__ ((format (printf, 2, 3)))
-#endif
-  ;
-
-/* Resizable string buffers. Struct definition in lj_obj.h. */
-LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
-
-#define lj_str_initbuf(sb)	((sb)->buf = NULL, (sb)->sz = 0)
-#define lj_str_resetbuf(sb)	((sb)->n = 0)
-#define lj_str_resizebuf(L, sb, size) \
-  ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
-   (sb)->sz = (size))
-#define lj_str_freebuf(g, sb)	lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
-
-#endif

+ 0 - 0
jni/LuaJIT-2.0.1/Android.mk → jni/LuaJIT-2.1/Android.mk


+ 1 - 1
jni/LuaJIT-2.0.1/COPYRIGHT → jni/LuaJIT-2.1/COPYRIGHT

@@ -1,7 +1,7 @@
 ===============================================================================
 ===============================================================================
 LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
 LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
 
 
-Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 of this software and associated documentation files (the "Software"), to deal

+ 21 - 11
jni/LuaJIT-2.0.1/Makefile → jni/LuaJIT-2.1/Makefile

@@ -10,13 +10,14 @@
 # For MSVC, please follow the instructions given in src/msvcbuild.bat.
 # For MSVC, please follow the instructions given in src/msvcbuild.bat.
 # For MinGW and Cygwin, cd to src and run make with the Makefile there.
 # For MinGW and Cygwin, cd to src and run make with the Makefile there.
 #
 #
-# Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 ##############################################################################
 ##############################################################################
 
 
 MAJVER=  2
 MAJVER=  2
-MINVER=  0
-RELVER=  1
-VERSION= $(MAJVER).$(MINVER).$(RELVER)
+MINVER=  1
+RELVER=  0
+PREREL=  -beta1
+VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
 ABIVER=  5.1
 ABIVER=  5.1
 
 
 ##############################################################################
 ##############################################################################
@@ -25,11 +26,12 @@ ABIVER=  5.1
 # the paths in src/luaconf.h, too. Note: PREFIX must be an absolute path!
 # the paths in src/luaconf.h, too. Note: PREFIX must be an absolute path!
 #
 #
 export PREFIX= /usr/local
 export PREFIX= /usr/local
+export MULTILIB= lib
 ##############################################################################
 ##############################################################################
 
 
 DPREFIX= $(DESTDIR)$(PREFIX)
 DPREFIX= $(DESTDIR)$(PREFIX)
 INSTALL_BIN=   $(DPREFIX)/bin
 INSTALL_BIN=   $(DPREFIX)/bin
-INSTALL_LIB=   $(DPREFIX)/lib
+INSTALL_LIB=   $(DPREFIX)/$(MULTILIB)
 INSTALL_SHARE= $(DPREFIX)/share
 INSTALL_SHARE= $(DPREFIX)/share
 INSTALL_INC=   $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER)
 INSTALL_INC=   $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER)
 
 
@@ -73,7 +75,8 @@ INSTALL_X= install -m 0755
 INSTALL_F= install -m 0644
 INSTALL_F= install -m 0644
 UNINSTALL= $(RM)
 UNINSTALL= $(RM)
 LDCONFIG= ldconfig -n
 LDCONFIG= ldconfig -n
-SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|"
+SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \
+            -e "s|^multilib=.*|multilib=$(MULTILIB)|"
 
 
 FILE_T= luajit
 FILE_T= luajit
 FILE_A= libluajit.a
 FILE_A= libluajit.a
@@ -81,8 +84,9 @@ FILE_SO= libluajit.so
 FILE_MAN= luajit.1
 FILE_MAN= luajit.1
 FILE_PC= luajit.pc
 FILE_PC= luajit.pc
 FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
 FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
-FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \
-	      dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua
+FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
+	      dis_x86.lua dis_x64.lua dis_arm.lua dis_ppc.lua \
+	      dis_mips.lua dis_mipsel.lua vmdef.lua
 
 
 ifeq (,$(findstring Windows,$(OS)))
 ifeq (,$(findstring Windows,$(OS)))
   ifeq (Darwin,$(shell uname -s))
   ifeq (Darwin,$(shell uname -s))
@@ -107,7 +111,7 @@ install: $(INSTALL_DEP)
 	$(MKDIR) $(INSTALL_DIRS)
 	$(MKDIR) $(INSTALL_DIRS)
 	cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
 	cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
 	cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
 	cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
-	$(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
+	$(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
 	cd src && test -f $(FILE_SO) && \
 	cd src && test -f $(FILE_SO) && \
 	  $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
 	  $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
 	  $(LDCONFIG) $(INSTALL_LIB) && \
 	  $(LDCONFIG) $(INSTALL_LIB) && \
@@ -119,12 +123,18 @@ install: $(INSTALL_DEP)
 	  $(RM) $(FILE_PC).tmp
 	  $(RM) $(FILE_PC).tmp
 	cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
 	cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
 	cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
 	cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
-	$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
 	@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
 	@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
+	@echo ""
+	@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
+	@echo "You can do this now by running this command (with sudo):"
+	@echo ""
+	@echo "  $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
+	@echo ""
+
 
 
 uninstall:
 uninstall:
 	@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
 	@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
-	$(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
+	$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
 	for file in $(FILES_JITLIB); do \
 	for file in $(FILES_JITLIB); do \
 	  $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
 	  $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
 	  done
 	  done

+ 3 - 3
jni/LuaJIT-2.0.1/README → jni/LuaJIT-2.1/README

@@ -1,11 +1,11 @@
-README for LuaJIT 2.0.1
------------------------
+README for LuaJIT 2.1.0-beta1
+-----------------------------
 
 
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 
 
 Project Homepage: http://luajit.org/
 Project Homepage: http://luajit.org/
 
 
-LuaJIT is Copyright (C) 2005-2013 Mike Pall.
+LuaJIT is Copyright (C) 2005-2015 Mike Pall.
 LuaJIT is free software, released under the MIT license.
 LuaJIT is free software, released under the MIT license.
 See full Copyright Notice in the COPYRIGHT file or in luajit.h.
 See full Copyright Notice in the COPYRIGHT file or in luajit.h.
 
 

BIN
jni/LuaJIT-2.1/android/armeabi-v7a/libluajit.a


BIN
jni/LuaJIT-2.1/android/armeabi/libluajit.a


+ 2 - 2
jni/LuaJIT-2.0.1/buildandroid.sh → jni/LuaJIT-2.1/buildandroid.sh

@@ -26,8 +26,8 @@ if [[ x$NDK = "x" ]]; then
     exit 1
     exit 1
 fi
 fi
 
 
-NDKABI=8
-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
+NDKABI=9
+NDKVER=$NDK/toolchains/arm-linux-androideabi-4.8
 NDKP=$NDKVER/prebuilt/${host_os}-${host_arch}/bin/arm-linux-androideabi-
 NDKP=$NDKVER/prebuilt/${host_os}-${host_arch}/bin/arm-linux-androideabi-
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
 NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
 CFLAGS=""
 CFLAGS=""

+ 1 - 1
jni/LuaJIT-2.0.1/doc/bluequad-print.css → jni/LuaJIT-2.1/doc/bluequad-print.css

@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2013 Mike Pall.
+/* Copyright (C) 2004-2015 Mike Pall.
  *
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.
  * But please do not steal the stylesheet, the layout or the color scheme.

+ 1 - 1
jni/LuaJIT-2.0.1/doc/bluequad.css → jni/LuaJIT-2.1/doc/bluequad.css

@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2013 Mike Pall.
+/* Copyright (C) 2004-2015 Mike Pall.
  *
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.
  * But please do not steal the stylesheet, the layout or the color scheme.

+ 177 - 232
jni/LuaJIT-2.0.1/doc/changes.html → jni/LuaJIT-2.1/doc/changes.html

@@ -4,7 +4,7 @@
 <title>LuaJIT Change History</title>
 <title>LuaJIT Change History</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -44,6 +44,8 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -63,7 +65,7 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
 <div id="main">
 <div id="main">
 <p>
 <p>
 This is a list of changes between the released versions of LuaJIT.<br>
 This is a list of changes between the released versions of LuaJIT.<br>
-The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT&nbsp;2.0.1</strong>.<br>
+The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT&nbsp;2.0.4</strong>.<br>
 </p>
 </p>
 <p>
 <p>
 Please check the
 Please check the
@@ -72,6 +74,178 @@ to see whether newer versions are available.
 </p>
 </p>
 
 
 <div class="major" style="background: #d0d0ff;">
 <div class="major" style="background: #d0d0ff;">
+<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 &mdash; 2015-08-25</h2>
+<p>
+This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0.
+Please take a look at the commit history for more details.
+</p>
+<ul>
+<li>Changes to the VM core:
+<ul>
+<li>Add low-overhead profiler (<tt>-jp</tt>).</li>
+<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li>
+<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li>
+<li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li>
+<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li>
+<li>Parse binary number literals (<tt>0bxxx</tt>).</li>
+</ul></li>
+<li>Improvements to the JIT compiler:
+<ul>
+<li>Add trace stitching.</li>
+<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
+<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
+<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>
+<li>Compile string concatenations (<tt>BC_CAT</tt>).</li>
+<li>Compile <tt>__concat</tt> metamethod.</li>
+<li>Various minor optimizations.</li>
+</ul></li>
+<li>Internal Changes:
+<ul>
+<li>Add support for embedding LuaJIT bytecode for builtins.</li>
+<li>Replace various builtins with embedded bytecode.</li>
+<li>Refactor string buffers and string formatting.</li>
+<li>Remove obsolete non-truncating number to integer conversions.</li>
+</ul></li>
+<li>Ports:
+<ul>
+<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li>
+<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt> mode).</li>
+<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li>
+<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li>
+<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li>
+<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li>
+<li>PPC/e500: Drop support for this architecture.</li>
+</ul></li>
+<li>FFI library:
+<ul>
+<li>FFI: Add 64 bit bitwise operations.</li>
+<li>FFI: Compile VLA/VLS and large cdata allocations with default initialization.</li>
+<li>FFI: Compile conversions from functions to function pointers.</li>
+<li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li>
+<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li>
+<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li>
+<li>FFI: Add <tt>ssize_t</tt> declaration.</li>
+</ul></li>
+</ul>
+</div>
+
+<div class="major" style="background: #ffffd0;">
+<h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 &mdash; 2015-05-14</h2>
+<ul>
+<li>Fix stack check in narrowing optimization.</li>
+<li>Fix Lua/C API typecheck error for special indexes.</li>
+<li>Fix string to number conversion.</li>
+<li>Fix lexer error for chunks without tokens.</li>
+<li>Don't compile <tt>IR_RETF</tt> after <tt>CALLT</tt> to ff with-side effects.</li>
+<li>Fix <tt>BC_UCLO</tt>/<tt>BC_JMP</tt> join optimization in Lua parser.</li>
+<li>Fix corner case in string to number conversion.</li>
+<li>Gracefully handle <tt>lua_error()</tt> for a suspended coroutine.</li>
+<li>Avoid error messages when building with Clang.</li>
+<li>Fix snapshot #0 handling for traces with a stack check on entry.</li>
+<li>Fix fused constant loads under high register pressure.</li>
+<li>Invalidate backpropagation cache after DCE.</li>
+<li>Fix ABC elimination.</li>
+<li>Fix debug info for main chunk of stripped bytecode.</li>
+<li>Fix FOLD rule for <tt>string.sub(s, ...) == k</tt>.</li>
+<li>Fix FOLD rule for <tt>STRREF</tt> of <tt>SNEW</tt>.</li>
+<li>Fix frame traversal while searching for error function.</li>
+<li>Prevent GC estimate miscalculation due to buffer growth.</li>
+<li>Prevent adding side traces for stack checks.</li>
+<li>Fix top slot calculation for snapshots with continuations.</li>
+<li>Fix check for reuse of SCEV results in <tt>FORL</tt>.</li>
+<li>Add PS Vita port.</li>
+<li>Fix compatibility issues with Illumos.</li>
+<li>Fix DragonFly build (unsupported).</li>
+<li>OpenBSD/x86: Better executable memory allocation for W^X mode.</li>
+<li>x86: Fix argument checks for <tt>ipairs()</tt> iterator.</li>
+<li>x86: <tt>lj_math_random_step()</tt> clobbers XMM regs on OSX Clang.</li>
+<li>x86: Fix code generation for unused result of <tt>math.random()</tt>.</li>
+<li>x64: Allow building with <tt>LUAJIT_USE_SYSMALLOC</tt> and <tt>LUAJIT_USE_VALGRIND</tt>.</li>
+<li>x86/x64: Fix argument check for bit shifts.</li>
+<li>x86/x64: Fix code generation for fused test/arith ops.</li>
+<li>ARM: Fix write barrier check in <tt>BC_USETS</tt>.</li>
+<li>PPC: Fix red zone overflow in machine code generation.</li>
+<li>PPC: Don't use <tt>mcrxr</tt> on PPE.</li>
+<li>Various archs: Fix excess stack growth in interpreter.</li>
+<li>FFI: Fix FOLD rule for <tt>TOBIT</tt> + <tt>CONV num.u32</tt>.</li>
+<li>FFI: Prevent DSE across <tt>ffi.string()</tt>.</li>
+<li>FFI: No meta fallback when indexing pointer to incomplete struct.</li>
+<li>FFI: Fix initialization of unions of subtypes.</li>
+<li>FFI: Fix cdata vs. non-cdata arithmetic and comparisons.</li>
+<li>FFI: Fix <tt>__index</tt>/<tt>__newindex</tt> metamethod resolution for ctypes.</li>
+<li>FFI: Fix compilation of reference field access.</li>
+<li>FFI: Fix frame traversal for backtraces with FFI callbacks.</li>
+<li>FFI: Fix recording of indexing a struct pointer ctype object itself.</li>
+<li>FFI: Allow non-scalar cdata to be compared for equality by address.</li>
+<li>FFI: Fix pseudo type conversions for type punning.</li>
+</ul>
+
+<h2 id="LuaJIT-2.0.3">LuaJIT 2.0.3 &mdash; 2014-03-12</h2>
+<ul>
+<li>Add PS4 port.</li>
+<li>Add support for multilib distro builds.</li>
+<li>Fix OSX build.</li>
+<li>Fix MinGW build.</li>
+<li>Fix Xbox 360 build.</li>
+<li>Improve ULOAD forwarding for open upvalues.</li>
+<li>Fix GC steps threshold handling when called by JIT-compiled code.</li>
+<li>Fix argument checks for <tt>math.deg()</tt> and <tt>math.rad()</tt>.</li>
+<li>Fix <tt>jit.flush(func|true)</tt>.</li>
+<li>Respect <tt>jit.off(func)</tt> when returning to a function, too.</li>
+<li>Fix compilation of <tt>string.byte(s, nil, n)</tt>.</li>
+<li>Fix line number for relocated bytecode after closure fixup</li>
+<li>Fix frame traversal for backtraces.</li>
+<li>Fix ABC elimination.</li>
+<li>Fix handling of redundant PHIs.</li>
+<li>Fix snapshot restore for exit to function header.</li>
+<li>Fix type punning alias analysis for constified pointers</li>
+<li>Fix call unroll checks in the presence of metamethod frames.</li>
+<li>Fix initial maxslot for down-recursive traces.</li>
+<li>Prevent BASE register coalescing if parent uses <tt>IR_RETF</tt>.</li>
+<li>Don't purge modified function from stack slots in <tt>BC_RET</tt>.</li>
+<li>Fix recording of <tt>BC_VARG</tt>.</li>
+<li>Don't access dangling reference to reallocated IR.</li>
+<li>Fix frame depth display for bytecode dump in <tt>-jdump</tt>.</li>
+<li>ARM: Fix register allocation when rematerializing FPRs.</li>
+<li>x64: Fix store to upvalue for lightuserdata values.</li>
+<li>FFI: Add missing GC steps for callback argument conversions.</li>
+<li>FFI: Properly unload loaded DLLs.</li>
+<li>FFI: Fix argument checks for <tt>ffi.string()</tt>.</li>
+<li>FFI/x64: Fix passing of vector arguments to calls.</li>
+<li>FFI: Rehash finalizer table after GC cycle, if needed.</li>
+<li>FFI: Fix <tt>cts-&gt;L</tt> for cdata unsinking in snapshot restore.</li>
+</ul>
+
+<h2 id="LuaJIT-2.0.2">LuaJIT 2.0.2 &mdash; 2013-06-03</h2>
+<ul>
+<li>Fix memory access check for fast string interning.</li>
+<li>Fix MSVC intrinsics for older versions.</li>
+<li>Add missing GC steps for <tt>io.*</tt> functions.</li>
+<li>Fix spurious red zone overflows in machine code generation.</li>
+<li>Fix jump-range constrained mcode allocation.</li>
+<li>Inhibit DSE for implicit loads via calls.</li>
+<li>Fix builtin string to number conversion for overflow digits.</li>
+<li>Fix optional argument handling while recording builtins.</li>
+<li>Fix optional argument handling in <tt>table.concat()</tt>.</li>
+<li>Add partial support for building with MingW64 GCC 4.8-SEH.</li>
+<li>Add missing PHI barrier to <tt>string.sub(str, a, b) == kstr</tt> FOLD rule.</li>
+<li>Fix compatibility issues with Illumos.</li>
+<li>ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.</li>
+<li>MIPS: Fix cache flush/sync for JIT-compiled code jump area.</li>
+<li>PPC: Add <tt>plt</tt> suffix for external calls from assembler code.</li>
+<li>FFI: Fix snapshot substitution in SPLIT pass.</li>
+<li>FFI/x86: Fix register allocation for 64 bit comparisons.</li>
+<li>FFI: Fix tailcall in lowest frame to C&nbsp;function with bool result.</li>
+<li>FFI: Ignore <tt>long</tt> type specifier in <tt>ffi.istype()</tt>.</li>
+<li>FFI: Fix calling conventions for 32 bit OSX and iOS simulator (struct returns).</li>
+<li>FFI: Fix calling conventions for ARM hard-float EABI (nested structs).</li>
+<li>FFI: Improve error messages for arithmetic and comparison operators.</li>
+<li>FFI: Insert no-op type conversion for pointer to integer cast.</li>
+<li>FFI: Fix unroll limit for <tt>ffi.fill()</tt>.</li>
+<li>FFI: Must sink <tt>XBAR</tt> together with <tt>XSTORE</tt>s.</li>
+<li>FFI: Preserve intermediate string for <tt>const&nbsp;char&nbsp;*</tt> conversion.</li>
+</ul>
+
 <h2 id="LuaJIT-2.0.1">LuaJIT 2.0.1 &mdash; 2013-02-19</h2>
 <h2 id="LuaJIT-2.0.1">LuaJIT 2.0.1 &mdash; 2013-02-19</h2>
 <ul>
 <ul>
 <li>Don't clear frame for out-of-memory error.</li>
 <li>Don't clear frame for out-of-memory error.</li>
@@ -619,240 +793,11 @@ This matches the behavior of Lua 5.1, but not the specification.</li>
 no point in listing differences over earlier versions.</li>
 no point in listing differences over earlier versions.</li>
 </ul>
 </ul>
 </div>
 </div>
-
-<div class="major" style="background: #ffff80;">
-<h2 id="LuaJIT-1.1.8">LuaJIT 1.1.8 &mdash; 2012-04-16</h2>
-<ul>
-<li>Merged with Lua 5.1.5. Also integrated fixes for all
-<a href="http://www.lua.org/bugs.html#5.1.5"><span class="ext">&raquo;</span>&nbsp;<span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.5</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.7">LuaJIT 1.1.7 &mdash; 2011-05-05</h2>
-<ul>
-<li>Added fixes for the
-<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.6">LuaJIT 1.1.6 &mdash; 2010-03-28</h2>
-<ul>
-<li>Added fixes for the
-<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
-<li>Removed wrong GC check in <tt>jit_createstate()</tt>.
-Thanks to Tim Mensch.</li>
-<li>Fixed bad assertions while compiling <tt>table.insert()</tt> and
-<tt>table.remove()</tt>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 &mdash; 2008-10-25</h2>
-<ul>
-<li>Merged with Lua 5.1.4. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.3</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 &mdash; 2008-02-05</h2>
-<ul>
-<li>Merged with Lua 5.1.3. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.2</a>.</li>
-<li>Fixed possible (but unlikely) stack corruption while compiling
-<tt>k^x</tt> expressions.</li>
-<li>Fixed DynASM template for cmpss instruction.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 &mdash; 2007-05-24</h2>
-<ul>
-<li>Merged with Lua 5.1.2. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.1</a>.</li>
-<li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li>
-<li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li>
-<li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li>
-<li>Fix some compiler warnings for DynASM glue (internal API change).</li>
-<li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li>
-<li>The loadable debug modules now handle redirection to stdout
-(e.g. <tt>-j&nbsp;trace=-</tt>).</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 &mdash; 2006-06-24</h2>
-<ul>
-<li>Fix MSVC inline assembly: use only local variables with
-<tt>lua_number2int()</tt>.</li>
-<li>Fix "attempt to call a thread value" bug on Mac OS X:
-make values of consts used as lightuserdata keys unique
-to avoid joining by the compiler/linker.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 &mdash; 2006-06-20</h2>
-<ul>
-<li>Merged with Lua 5.1.1. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1</a>.</li>
-<li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li>
-<li>Minor changes to DynASM: faster pre-processing, smaller encoding
-for some immediates.</li>
-</ul>
-<p>
-This release is in sync with Coco 1.1.1 (see the
-<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
-</p>
-
-<h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 &mdash; 2006-03-13</h2>
-<ul>
-<li>Merged with Lua 5.1 (final).</li>
-
-<li>New JIT call frame setup:
-<ul>
-<li>The C stack is kept 16 byte aligned (faster).
-Mandatory for Mac OS X on Intel, too.</li>
-<li>Faster calling conventions for internal C helper functions.</li>
-<li>Better instruction scheduling for function prologue, OP_CALL and
-OP_RETURN.</li>
-</ul></li>
-
-<li>Miscellaneous optimizations:
-<ul>
-<li>Faster loads of FP constants. Remove narrow-to-wide store-to-load
-forwarding stalls.</li>
-<li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves
-and FP to integer conversions.</li>
-<li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li>
-<li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>).
-With better accuracy than the C variant, too.</li>
-<li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or
-use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li>
-</ul></li>
-
-<li>Changes in the optimizer:
-<ul>
-<li>Improved hinting for table keys derived from table values
-(<tt>t1[t2[x]]</tt>).</li>
-<li>Lookup hinting now works with arbitrary object types and
-supports index chains, too.</li>
-<li>Generate type hints for arithmetic and comparison operators,
-OP_LEN, OP_CONCAT and OP_FORPREP.</li>
-<li>Remove several hint definitions in favour of a generic COMBINE hint.</li>
-<li>Complete rewrite of <tt>jit.opt_inline</tt> module
-(ex <tt>jit.opt_lib</tt>).</li>
-</ul></li>
-
-<li>Use adaptive deoptimization:
-<ul>
-<li>If runtime verification of a contract fails, the affected
-instruction is recompiled and patched on-the-fly.
-Regular programs will trigger deoptimization only occasionally.</li>
-<li>This avoids generating code for uncommon fallback cases
-most of the time. Generated code is up to 30% smaller compared to
-LuaJIT&nbsp;1.0.3.</li>
-<li>Deoptimization is used for many opcodes and contracts:
-<ul>
-<li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li>
-<li>Inlined calls: closure mismatch, parameter number and type mismatches.</li>
-<li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li>
-<li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT,
-OP_FORPREP: operand type and range mismatches.</li>
-</ul></li>
-<li>Complete redesign of the debug and traceback info
-(bytecode &harr; mcode) to support deoptimization.
-Much more flexible and needs only 50% of the space.</li>
-<li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and
-<tt>jit.dump</tt> handle deoptimization.</li>
-</ul></li>
-
-<li>Inlined many popular library functions
-(for commonly used arguments only):
-<ul>
-<li>Most <tt>math.*</tt> functions (the 18 most used ones)
-[2x-10x faster].</li>
-<li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt>
-[2x-10x faster].</li>
-<li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt>
-[3x-5x faster].</li>
-<li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt>
-[3x-5x faster].</li>
-<li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators
-[8x-15x faster].</li>
-</ul></li>
-
-<li>Changes in the core and loadable modules and the stand-alone executable:
-<ul>
-<li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt>
-and <tt>jit.arch</tt>.</li>
-<li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li>
-<li>The <tt>-j dump</tt> output now shows JSUB names, too.</li>
-<li>New x86 disassembler module written in pure Lua. No dependency
-on ndisasm anymore. Flexible API, very compact (500 lines)
-and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li>
-<li><tt>luajit -v</tt> prints the LuaJIT version and copyright
-on a separate line.</li>
-</ul></li>
-
-<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
-<li>Miscellaneous doc changes. Added a section about
-<a href="install.html#embedding">embedding LuaJIT</a>.</li>
-</ul>
-<p>
-This release is in sync with Coco 1.1.0 (see the
-<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
-</p>
-</div>
-
-<div class="major" style="background: #ffffd0;">
-<h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 &mdash; 2005-09-08</h2>
-<ul>
-<li>Even more docs.</li>
-<li>Unified closure checks in <tt>jit.*</tt>.</li>
-<li>Fixed some range checks in <tt>jit.util.*</tt>.</li>
-<li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li>
-<li>Merged with Lua 5.1 alpha (including early bug fixes).</li>
-</ul>
-<p>
-This is the first public release of LuaJIT.
-</p>
-
-<h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 &mdash; 2005-09-02</h2>
-<ul>
-<li>Add support for flushing the Valgrind translation cache <br>
-(<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li>
-<li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based
-variant for POSIX systems.</li>
-<li>Reorganized the C&nbsp;function signature handling in
-<tt>jit.opt_lib</tt>.</li>
-<li>Changed to index-based hints for inlining C&nbsp;functions.
-Still no support in the backend for inlining.</li>
-<li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li>
-<li>Misc. changes to the <tt>jit.*</tt> modules.</li>
-<li>Misc. changes to the Makefiles.</li>
-<li>Lots of new docs.</li>
-<li>Complete doc reorg.</li>
-</ul>
-<p>
-Not released because Lua 5.1 alpha came out today.
-</p>
-
-<h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 &mdash; 2005-08-31</h2>
-<ul>
-<li>Missing GC step in <tt>OP_CONCAT</tt>.</li>
-<li>Fix result handling for C &ndash;> JIT calls.</li>
-<li>Detect CPU feature bits.</li>
-<li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li>
-<li>Add fallback instructions for FP compares.</li>
-<li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li>
-<li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute
-(David Burgess).</li>
-<li>Misc. doc updates.</li>
-</ul>
-<p>
-Interim non-public release.
-Special thanks to Adam D. Moss for reporting most of the bugs.
-</p>
-
-<h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 &mdash; 2005-08-29</h2>
-<p>
-This is the initial non-public release of LuaJIT.
-</p>
-</div>
 <br class="flush">
 <br class="flush">
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 5 - 3
jni/LuaJIT-2.0.1/doc/contact.html → jni/LuaJIT-2.1/doc/contact.html

@@ -4,7 +4,7 @@
 <title>Contact</title>
 <title>Contact</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +41,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -84,7 +86,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
 <h2>Copyright</h2>
 <h2>Copyright</h2>
 <p>
 <p>
 All documentation is
 All documentation is
-Copyright &copy; 2005-2013 Mike Pall.
+Copyright &copy; 2005-2015 Mike Pall.
 </p>
 </p>
 
 
 
 
@@ -92,7 +94,7 @@ Copyright &copy; 2005-2013 Mike Pall.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 4 - 2
jni/LuaJIT-2.0.1/doc/ext_c_api.html → jni/LuaJIT-2.1/doc/ext_c_api.html

@@ -4,7 +4,7 @@
 <title>Lua/C API Extensions</title>
 <title>Lua/C API Extensions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +41,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a class="current" href="ext_c_api.html">Lua/C API</a>
 <a class="current" href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -177,7 +179,7 @@ Also note that this mechanism is not without overhead.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 4 - 2
jni/LuaJIT-2.0.1/doc/ext_ffi.html → jni/LuaJIT-2.1/doc/ext_ffi.html

@@ -4,7 +4,7 @@
 <title>FFI Library</title>
 <title>FFI Library</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +41,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -320,7 +322,7 @@ without undue conversion penalties.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 8 - 4
jni/LuaJIT-2.0.1/doc/ext_ffi_api.html → jni/LuaJIT-2.1/doc/ext_ffi_api.html

@@ -4,7 +4,7 @@
 <title>ffi.* API Functions</title>
 <title>ffi.* API Functions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -46,6 +46,8 @@ td.abiparam { font-weight: bold; width: 6em; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -466,6 +468,8 @@ otherwise. The following parameters are currently defined:
 <td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
 <td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
 <tr class="odd">
 <tr class="odd">
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
+<tr class="even">
+<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
 </table>
 </table>
 
 
 <h3 id="ffi_os"><tt>ffi.os</tt></h3>
 <h3 id="ffi_os"><tt>ffi.os</tt></h3>
@@ -542,8 +546,8 @@ corresponding ctype.
 The parser for Lua source code treats numeric literals with the
 The parser for Lua source code treats numeric literals with the
 suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
 suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
 integers. Case doesn't matter, but uppercase is recommended for
 integers. Case doesn't matter, but uppercase is recommended for
-readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal
-(<tt>0x2aLL</tt>) literals.
+readability. It handles decimal (<tt>42LL</tt>), hexadecimal
+(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals.
 </p>
 </p>
 <p>
 <p>
 The imaginary part of complex numbers can be specified by suffixing
 The imaginary part of complex numbers can be specified by suffixing
@@ -556,7 +560,7 @@ named <tt>i</tt>.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 27 - 7
jni/LuaJIT-2.0.1/doc/ext_ffi_semantics.html → jni/LuaJIT-2.1/doc/ext_ffi_semantics.html

@@ -4,7 +4,7 @@
 <title>FFI Semantics</title>
 <title>FFI Semantics</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -46,6 +46,8 @@ td.convop { font-style: italic; width: 40%; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -183,6 +185,8 @@ a <tt>typedef</tt>, except re-declarations will be ignored):
 <tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
 <tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
 <tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
 <tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
 
 
+<li>From <tt>&lt;unistd.h&gt;</tt> (POSIX): <tt>ssize_t</tt>.</li>
+
 </ul>
 </ul>
 <p>
 <p>
 You're encouraged to use these types in preference to
 You're encouraged to use these types in preference to
@@ -730,6 +734,22 @@ You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
 number (e.g. for regular floating-point calculations) with
 number (e.g. for regular floating-point calculations) with
 <tt>tonumber()</tt>. But note this may incur a precision loss.</li>
 <tt>tonumber()</tt>. But note this may incur a precision loss.</li>
 
 
+<li><b>64&nbsp;bit bitwise operations</b>: the rules for 64&nbsp;bit
+arithmetic operators apply analogously.<br>
+
+Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt>
+converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and
+returns a Lua number.<br>
+
+For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the
+conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to
+<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br>
+
+For all other operations, only the first argument is used to determine
+the output type. This implies that a cdata number as a shift count for
+shifts and rotates is accepted, but that alone does <em>not</em> cause
+a cdata number output.
+
 </ul>
 </ul>
 
 
 <h3 id="cdata_comp">Comparisons of cdata objects</h3>
 <h3 id="cdata_comp">Comparisons of cdata objects</h3>
@@ -1188,7 +1208,9 @@ storing and initializing them are supported, yet.</li>
 <li>The <tt>volatile</tt> type qualifier is currently ignored by
 <li>The <tt>volatile</tt> type qualifier is currently ignored by
 compiled code.</li>
 compiled code.</li>
 <li><a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a> silently
 <li><a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a> silently
-ignores all re-declarations.</li>
+ignores most re-declarations. Note: avoid re-declarations which do not
+conform to C99. The implementation will eventually be changed to
+perform strict checks.</li>
 </ul>
 </ul>
 <p>
 <p>
 The JIT compiler already handles a large subset of all FFI operations.
 The JIT compiler already handles a large subset of all FFI operations.
@@ -1203,9 +1225,8 @@ suboptimal performance, especially when used in inner loops:
 <li>Vector operations.</li>
 <li>Vector operations.</li>
 <li>Table initializers.</li>
 <li>Table initializers.</li>
 <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
 <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
-<li>Allocations of variable-length arrays or structs.</li>
-<li>Allocations of C&nbsp;types with a size &gt; 128&nbsp;bytes or an
-alignment &gt; 8&nbsp;bytes.</li>
+<li>Non-default initialization of VLA/VLS or large C&nbsp;types
+(&gt; 128&nbsp;bytes or &gt; 16 array elements.</li>
 <li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
 <li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
 <li>Pointer differences for element sizes that are not a power of
 <li>Pointer differences for element sizes that are not a power of
 two.</li>
 two.</li>
@@ -1222,7 +1243,6 @@ value.</li>
 Other missing features:
 Other missing features:
 </p>
 </p>
 <ul>
 <ul>
-<li>Bit operations for 64&nbsp;bit types.</li>
 <li>Arithmetic for <tt>complex</tt> numbers.</li>
 <li>Arithmetic for <tt>complex</tt> numbers.</li>
 <li>Passing structs by value to vararg C&nbsp;functions.</li>
 <li>Passing structs by value to vararg C&nbsp;functions.</li>
 <li><a href="extensions.html#exceptions">C++ exception interoperability</a>
 <li><a href="extensions.html#exceptions">C++ exception interoperability</a>
@@ -1233,7 +1253,7 @@ compiled.</li>
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 5 - 3
jni/LuaJIT-2.0.1/doc/ext_ffi_tutorial.html → jni/LuaJIT-2.1/doc/ext_ffi_tutorial.html

@@ -4,7 +4,7 @@
 <title>FFI Tutorial</title>
 <title>FFI Tutorial</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -48,6 +48,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -564,7 +566,7 @@ Thus it's not helpful and actually counter-productive to cache
 individual C&nbsp;functions like this:
 individual C&nbsp;functions like this:
 </p>
 </p>
 <pre class="code">
 <pre class="code">
-local <b>funca</b>, <b>funcb</b> = ffi.C.funcb, ffi.C.funcb -- <span style="color:#c00000;">Not helpful!</span>
+local <b>funca</b>, <b>funcb</b> = ffi.C.funca, ffi.C.funcb -- <span style="color:#c00000;">Not helpful!</span>
 local function foo(x, n)
 local function foo(x, n)
   for i=1,n do <b>funcb</b>(<b>funca</b>(x, i), 1) end
   for i=1,n do <b>funcb</b>(<b>funca</b>(x, i), 1) end
 end
 end
@@ -591,7 +593,7 @@ it to a local variable in the function scope is unnecessary.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 5 - 3
jni/LuaJIT-2.0.1/doc/ext_jit.html → jni/LuaJIT-2.1/doc/ext_jit.html

@@ -4,7 +4,7 @@
 <title>jit.* Library</title>
 <title>jit.* Library</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -41,6 +41,8 @@
 <a class="current" href="ext_jit.html">jit.* Library</a>
 <a class="current" href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -151,7 +153,7 @@ Contains the target OS name:
 <h3 id="jit_arch"><tt>jit.arch</tt></h3>
 <h3 id="jit_arch"><tt>jit.arch</tt></h3>
 <p>
 <p>
 Contains the target architecture name:
 Contains the target architecture name:
-"x86", "x64" or "ppcspe".
+"x86", "x64", "arm", "ppc", or "mips".
 </p>
 </p>
 
 
 <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
 <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
@@ -189,7 +191,7 @@ if you want to know more.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 365 - 0
jni/LuaJIT-2.1/doc/ext_profiler.html

@@ -0,0 +1,365 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Profiler</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Profiler</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a class="current" href="ext_profiler.html">Profiler</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT has an integrated statistical profiler with very low overhead. It
+allows sampling the currently executing stack and other parameters in
+regular intervals.
+</p>
+<p>
+The integrated profiler can be accessed from three levels:
+</p>
+<ul>
+<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the
+<a href="#j_p"><tt>-jp</tt></a> command line option.</li>
+<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li>
+<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li>
+</ul>
+
+<h2 id="hl_profiler">High-Level Profiler</h2>
+<p>
+The bundled high-level profiler offers basic profiling functionality. It
+generates simple textual summaries or source code annotations. It can be
+accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option
+or from Lua code by loading the underlying <tt>jit.p</tt> module.
+</p>
+<p>
+To cut to the chase &mdash; run this to get a CPU usage profile by
+function name:
+</p>
+<pre class="code">
+luajit -jp myapp.lua
+</pre>
+<p>
+It's <em>not</em> a stated goal of the bundled profiler to add every
+possible option or to cater for special profiling needs. The low-level
+profiler APIs are documented below. They may be used by third-party
+authors to implement advanced functionality, e.g. IDE integration or
+graphical profilers.
+</p>
+<p>
+Note: Sampling works for both interpreted and JIT-compiled code. The
+results for JIT-compiled code may sometimes be surprising. LuaJIT
+heavily optimizes and inlines Lua code &mdash; there's no simple
+one-to-one correspondence between source code lines and the sampled
+machine code.
+</p>
+
+<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3>
+<p>
+The <tt>-jp</tt> command line option starts the high-level profiler.
+When the application run by the command line terminates, the profiler
+stops and writes the results to <tt>stdout</tt> or to the specified
+<tt>output</tt> file.
+</p>
+<p>
+The <tt>options</tt> argument specifies how the profiling is to be
+performed:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Stack dump: function name, otherwise module:line.
+This is the default mode.</li>
+<li><tt>F</tt> &mdash; Stack dump: ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Stack dump: module:line.</li>
+<li><tt>&lt;number&gt;</tt> &mdash; stack dump depth (callee &larr;
+caller). Default: 1.</li>
+<li><tt>-&lt;number&gt;</tt> &mdash; Inverse stack dump depth (caller
+&rarr; callee).</li>
+<li><tt>s</tt> &mdash; Split stack dump after first stack level. Implies
+depth&nbsp;&ge;&nbsp;2 or depth&nbsp;&le;&nbsp;-2.</li>
+<li><tt>p</tt> &mdash; Show full path for module names.</li>
+<li><tt>v</tt> &mdash; Show VM states.</li>
+<li><tt>z</tt> &mdash; Show <a href="#jit_zone">zones</a>.</li>
+<li><tt>r</tt> &mdash; Show raw sample counts. Default: show percentages.</li>
+<li><tt>a</tt> &mdash; Annotate excerpts from source code files.</li>
+<li><tt>A</tt> &mdash; Annotate complete source code files.</li>
+<li><tt>G</tt> &mdash; Produce raw output suitable for graphical tools.</li>
+<li><tt>m&lt;number&gt;</tt> &mdash; Minimum sample percentage to be shown.
+Default: 3%.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds.
+Default: 10ms.<br>
+Note: The actual sampling precision is OS-dependent.</li>
+</ul>
+<p>
+The default output for <tt>-jp</tt> is a list of the most CPU consuming
+spots in the application. Increasing the stack dump depth with (say)
+<tt>-jp=2</tt> may help to point out the main callers or callees of
+hotspots. But sample aggregation is still flat per unique stack dump.
+</p>
+<p>
+To get a two-level view (split view) of callers/callees, use
+<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second
+level are relative to the first level.
+</p>
+<p>
+To see how much time is spent in each line relative to a function, use
+<tt>-jp=fl</tt>.
+</p>
+<p>
+To see how much time is spent in different VM states or
+<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>.
+</p>
+<p>
+Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level
+views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time
+spent in a VM state or zone vs. hotspots. This can be used to answer
+questions like "Which time consuming functions are only interpreted?" or
+"What's the garbage collector overhead for a specific function?".
+</p>
+<p>
+Multiple options can be combined &mdash; but not all combinations make
+sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels
+deep in 4ms intervals and shows a split view of the CPU consuming
+functions and their callers with a 1% threshold.
+</p>
+<p>
+Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are
+always flat and at the line level. Obviously, the source code files need
+to be readable by the profiler script.
+</p>
+<p>
+The high-level profiler can also be started and stopped from Lua code with:
+</p>
+<pre class="code">
+require("jit.p").start(options, output)
+...
+require("jit.p").stop()
+</pre>
+
+<h3 id="jit_zone"><tt>jit.zone</tt> &mdash; Zones</h3>
+<p>
+Zones can be used to provide information about different parts of an
+application to the high-level profiler. E.g. a game could make use of an
+<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical,
+organized as a stack.
+</p>
+<p>
+The <tt>jit.zone</tt> module needs to be loaded explicitly:
+</p>
+<pre class="code">
+local zone = require("jit.zone")
+</pre>
+<ul>
+<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li>
+<li><tt>zone()</tt> pops the current zone from the zone stack and
+returns its name.</li>
+<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li>
+<li><tt>zone:flush()</tt> flushes the zone stack.</li>
+</ul>
+<p>
+To show the time spent in each zone use <tt>-jp=z</tt>. To show the time
+spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>.
+</p>
+
+<h2 id="ll_lua_api">Low-level Lua API</h2>
+<p>
+The <tt>jit.profile</tt> module gives access to the low-level API of the
+profiler from Lua code. This module needs to be loaded explicitly:
+<pre class="code">
+local profile = require("jit.profile")
+</pre>
+<p>
+This module can be used to implement your own higher-level profiler.
+A typical profiling run starts the profiler, captures stack dumps in
+the profiler callback, adds them to a hash table to aggregate the number
+of samples, stops the profiler and then analyzes all of the captured
+stack dumps. Other parameters can be sampled in the profiler callback,
+too. But it's important not to spend too much time in the callback,
+since this may skew the statistics.
+</p>
+
+<h3 id="profile_start"><tt>profile.start(mode, cb)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. The <tt>mode</tt> argument is a
+string holding options:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Profile with precision down to the function level.</li>
+<li><tt>l</tt> &mdash; Profile with precision down to the line level.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds (default
+10ms).</br>
+Note: The actual sampling precision is OS-dependent.
+</li>
+</ul>
+<p>
+The <tt>cb</tt> argument is a callback function which is called with
+three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is
+called on a separate coroutine, the <tt>thread</tt> argument is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify the stack of that state or call functions on it.
+</p>
+<p>
+<tt>samples</tt> gives the number of accumulated samples since the last
+callback (usually 1).
+</p>
+<p>
+<tt>vmstate</tt> holds the VM state at the time the profiling timer
+triggered. This may or may not correspond to the state of the VM when
+the profiling callback is called. The state is either <tt>'N'</tt>
+native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt>
+C&nbsp;code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT
+compiler.
+</p>
+
+<h3 id="profile_stop"><tt>profile.stop()</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner. It
+returns a string with a stack dump for the <tt>thread</tt> (coroutine),
+formatted according to the <tt>fmt</tt> argument:
+</p>
+<ul>
+<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise
+only the file name is used.</li>
+<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise
+use module:line.</li>
+<li><tt>F</tt> &mdash; Ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Dump module:line.</li>
+<li><tt>Z</tt> &mdash; Zap the following characters for the last dumped
+frame.</li>
+<li>All other characters are added verbatim to the output string.</li>
+</ul>
+<p>
+The <tt>depth</tt> argument gives the number of frames to dump, starting
+at the topmost frame of the thread. A negative number dumps the frames in
+inverse order.
+</p>
+<p>
+The first example prints a list of the current module names and line
+numbers of up to 10 frames in separate lines. The second example prints
+semicolon-separated function names for all frames (up to 100) in inverse
+order:
+</p>
+<pre class="code">
+print(profile.dumpstack(thread, "l\n", 10))
+print(profile.dumpstack(thread, "lZ;", -100))
+</pre>
+
+<h2 id="ll_c_api">Low-level C API</h2>
+<p>
+The profiler can be controlled directly from C&nbsp;code, e.g. for
+use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see
+<a href="ext_c_api.html">Lua/C API</a> extensions).
+</p>
+
+<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. <a href="#profile_start">See
+above</a> for a description of the <tt>mode</tt> argument.
+</p>
+<p>
+The <tt>cb</tt> argument is a callback function with the following
+declaration:
+</p>
+<pre class="code">
+typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
+                                        int samples, int vmstate);
+</pre>
+<p>
+<tt>data</tt> is available for use by the callback. <tt>L</tt> is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify this stack or call functions on this stack &mdash;
+use a separate coroutine for this purpose. <a href="#profile_start">See
+above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>.
+</p>
+
+<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner.
+<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt>
+and <tt>depth</tt>.
+</p>
+<p>
+This function returns a <tt>const&nbsp;char&nbsp;*</tt> pointing to a
+private string buffer of the profiler. The <tt>int&nbsp;*len</tt>
+argument returns the length of the output string. The buffer is
+overwritten on the next call and deallocated when the profiler stops.
+You either need to consume the content immediately or copy it for later
+use.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>

+ 53 - 5
jni/LuaJIT-2.0.1/doc/extensions.html → jni/LuaJIT-2.1/doc/extensions.html

@@ -4,7 +4,7 @@
 <title>Extensions</title>
 <title>Extensions</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -58,6 +58,8 @@ td.excinterop {
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -113,6 +115,9 @@ bit.lshift bit.rshift bit.arshift bit.rol  bit.ror  bit.bswap
 This module is a LuaJIT built-in &mdash; you don't need to download or
 This module is a LuaJIT built-in &mdash; you don't need to download or
 install Lua BitOp. The Lua BitOp site has full documentation for all
 install Lua BitOp. The Lua BitOp site has full documentation for all
 <a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
 <a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
+The FFI adds support for
+<a href="ext_ffi_semantics.html#cdata_arith">64&nbsp;bit bitwise operations</a>,
+using the same API functions.
 </p>
 </p>
 <p>
 <p>
 Please make sure to <tt>require</tt> the module before using any of
 Please make sure to <tt>require</tt> the module before using any of
@@ -146,6 +151,11 @@ LuaJIT adds some
 <a href="ext_c_api.html">extra functions to the Lua/C API</a>.
 <a href="ext_c_api.html">extra functions to the Lua/C API</a>.
 </p>
 </p>
 
 
+<h3 id="profiler">Profiler</h3>
+<p>
+LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>.
+</p>
+
 <h2 id="library">Enhanced Standard Library Functions</h2>
 <h2 id="library">Enhanced Standard Library Functions</h2>
 
 
 <h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
 <h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
@@ -173,7 +183,7 @@ in <tt>"-inf"</tt>.
 <h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
 <h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
 <p>
 <p>
 All string-to-number conversions consistently convert integer and
 All string-to-number conversions consistently convert integer and
-floating-point inputs in decimal and hexadecimal on all platforms.
+floating-point inputs in decimal, hexadecimal and binary on all platforms.
 <tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
 <tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
 problems with poor C library implementations. The builtin conversion
 problems with poor C library implementations. The builtin conversion
 function provides full precision according to the IEEE-754 standard, it
 function provides full precision according to the IEEE-754 standard, it
@@ -197,6 +207,36 @@ for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
 minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
 minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
 bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
 bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
 </p>
 </p>
+<p>
+Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
+a different, incompatible bytecode format for ports that use this mode (e.g.
+ARM64). This may be rectified in the future.
+</p>
+
+<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
+<p>
+An extra library function <tt>table.new()</tt> can be made available via
+<tt>require("table.new")</tt>. This creates a pre-sized table, just like
+the C API equivalent <tt>lua_createtable()</tt>. This is useful for big
+tables if the final table size is known and automatic table resizing is
+too expensive.
+</p>
+
+<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3>
+<p>
+An extra library function <tt>table.clear()</tt> can be made available
+via <tt>require("table.clear")</tt>. This clears all keys and values
+from a table, but preserves the allocated array/hash sizes. This is
+useful when a table, which is linked from multiple places, needs to be
+cleared and/or when recycling a table for use by the same context. This
+avoids managing backlinks, saves an allocation and the overhead of
+incremental array/hash part growth.
+</p>
+<p>
+Please note this function is meant for very specific situations. In most
+cases it's better to replace the (usually single) link with a new table
+and let the GC do its work.
+</p>
 
 
 <h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
 <h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
 <p>
 <p>
@@ -248,8 +288,9 @@ enabled:
 <ul>
 <ul>
 <li><tt>goto</tt> and <tt>::labels::</tt>.</li>
 <li><tt>goto</tt> and <tt>::labels::</tt>.</li>
 <li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li>
 <li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li>
-<li><tt>load(string|reader, chunkname [,mode [,env]])</tt>.
-<tt>loadstring()</tt> is an alias.</li>
+<li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li>
+<li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li>
+<li><tt>loadfile(filename [,mode [,env]])</tt>.</li>
 <li><tt>math.log(x [,base])</tt>.
 <li><tt>math.log(x [,base])</tt>.
 <li><tt>string.rep(s, n [,sep])</tt>.
 <li><tt>string.rep(s, n [,sep])</tt>.
 <li><tt>string.format()</tt>: <tt>%q</tt> reversible.
 <li><tt>string.format()</tt>: <tt>%q</tt> reversible.
@@ -303,6 +344,13 @@ Lua&nbsp;5.1, which prevents implementing features that would otherwise
 break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
 break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
 </p>
 </p>
 
 
+<h2 id="lua53">Extensions from Lua 5.3</h2>
+<p>
+LuaJIT supports some extensions from Lua&nbsp;5.3:
+<ul>
+<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
+</ul>
+
 <h2 id="exceptions">C++ Exception Interoperability</h2>
 <h2 id="exceptions">C++ Exception Interoperability</h2>
 <p>
 <p>
 LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
 LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
@@ -397,7 +445,7 @@ lead to the termination of the process.</li>
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 4 - 2
jni/LuaJIT-2.0.1/doc/faq.html → jni/LuaJIT-2.1/doc/faq.html

@@ -4,7 +4,7 @@
 <title>Frequently Asked Questions (FAQ)</title>
 <title>Frequently Asked Questions (FAQ)</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -44,6 +44,8 @@ dd { margin-left: 1.5em; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -174,7 +176,7 @@ the development of certain features, if they are important to you.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 0 - 0
jni/LuaJIT-2.0.1/doc/img/contact.png → jni/LuaJIT-2.1/doc/img/contact.png


+ 92 - 42
jni/LuaJIT-2.0.1/doc/install.html → jni/LuaJIT-2.1/doc/install.html

@@ -4,7 +4,7 @@
 <title>Installation</title>
 <title>Installation</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -69,6 +69,8 @@ td.compatno {
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -112,38 +114,38 @@ operating systems, CPUs and compilers:
 </tr>
 </tr>
 <tr class="odd separate">
 <tr class="odd separate">
 <td class="compatcpu">x86 (32 bit)</td>
 <td class="compatcpu">x86 (32 bit)</td>
-<td class="compatos">GCC 4.x<br>GCC 3.4</td>
-<td class="compatos">GCC 4.x<br>GCC 3.4</td>
-<td class="compatos">GCC 4.x<br>GCC 3.4</td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos">XCode 5.0+<br>Clang</td>
 <td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td>
 <td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td>
 </tr>
 </tr>
 <tr class="even">
 <tr class="even">
 <td class="compatcpu">x64 (64 bit)</td>
 <td class="compatcpu">x64 (64 bit)</td>
-<td class="compatos">GCC 4.x</td>
-<td class="compatos compatno">&nbsp;</td>
-<td class="compatos">GCC 4.x</td>
-<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0</td>
+<td class="compatos">GCC 4.2+</td>
+<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td>
+<td class="compatos">XCode 5.0+<br>Clang</td>
+<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
 </tr>
 </tr>
 <tr class="odd">
 <tr class="odd">
 <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
 <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
 <td class="compatos">GCC 4.2+</td>
 <td class="compatos">GCC 4.2+</td>
-<td class="compatos">GCC 4.2+</td>
-<td class="compatos">GCC 4.2+</td>
+<td class="compatos">GCC 4.2+<br>PSP2 (<a href="#psvita">PS VITA</a>)</td>
+<td class="compatos">XCode 5.0+<br>Clang</td>
 <td class="compatos compatno">&nbsp;</td>
 <td class="compatos compatno">&nbsp;</td>
 </tr>
 </tr>
 <tr class="even">
 <tr class="even">
-<td class="compatcpu"><a href="#cross2">PPC</a></td>
-<td class="compatos">GCC 4.3+</td>
-<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
+<td class="compatcpu"><a href="#cross2">ARM64</a></td>
+<td class="compatos">GCC 4.8+</td>
+<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">XCode 6.0+<br>Clang 3.5+</td>
 <td class="compatos compatno">&nbsp;</td>
 <td class="compatos compatno">&nbsp;</td>
-<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
 </tr>
 </tr>
 <tr class="odd">
 <tr class="odd">
-<td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td>
-<td class="compatos">GCC 4.3+</td>
+<td class="compatcpu"><a href="#cross2">PPC</a></td>
 <td class="compatos">GCC 4.3+</td>
 <td class="compatos">GCC 4.3+</td>
+<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
 <td class="compatos compatno">&nbsp;</td>
 <td class="compatos compatno">&nbsp;</td>
-<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
 </tr>
 </tr>
 <tr class="even">
 <tr class="even">
 <td class="compatcpu"><a href="#cross2">MIPS</a></td>
 <td class="compatcpu"><a href="#cross2">MIPS</a></td>
@@ -188,8 +190,8 @@ open a terminal window and change to this directory. Now unpack the archive
 and change to the newly created directory:
 and change to the newly created directory:
 </p>
 </p>
 <pre class="code">
 <pre class="code">
-tar zxf LuaJIT-2.0.1.tar.gz
-cd LuaJIT-2.0.1</pre>
+tar zxf LuaJIT-2.0.4.tar.gz
+cd LuaJIT-2.0.4</pre>
 <h3>Building LuaJIT</h3>
 <h3>Building LuaJIT</h3>
 <p>
 <p>
 The supplied Makefiles try to auto-detect the settings needed for your
 The supplied Makefiles try to auto-detect the settings needed for your
@@ -383,10 +385,11 @@ make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
 # ARM hard-float ABI with VFP (armhf, requires recent toolchain)
 # ARM hard-float ABI with VFP (armhf, requires recent toolchain)
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
 
 
+# ARM64 (requires x64 host)
+make CROSS=aarch64-linux-
+
 # PPC
 # PPC
 make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
-# PPC/e500v2 (fast interpreter only)
-make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
 
 
 # MIPS big-endian
 # MIPS big-endian
 make HOST_CC="gcc -m32" CROSS=mips-linux-
 make HOST_CC="gcc -m32" CROSS=mips-linux-
@@ -439,8 +442,7 @@ NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
 make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
 </pre>
 </pre>
 <p>
 <p>
-You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>.
-The environment variables need to match the iOS SDK version:
+You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:
 </p>
 </p>
 <p style="font-size: 8pt;">
 <p style="font-size: 8pt;">
 Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
 Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
@@ -450,38 +452,84 @@ much slower than the JIT compiler. Please complain to Apple, not me.
 Or use Android. :-p
 Or use Android. :-p
 </p>
 </p>
 <pre class="code">
 <pre class="code">
-IXCODE=`xcode-select -print-path`
-ISDK=$IXCODE/Platforms/iPhoneOS.platform/Developer
-ISDKVER=iPhoneOS6.0.sdk
-ISDKP=$ISDK/usr/bin/
-ISDKF="-arch armv7 -isysroot $ISDK/SDKs/$ISDKVER"
-make HOST_CC="gcc -m32 -arch i386" CROSS=$ISDKP TARGET_FLAGS="$ISDKF" \
-     TARGET_SYS=iOS
+# iOS/ARM (32 bit)
+ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ICC=$(xcrun --sdk iphoneos --find clang)
+ISDKF="-arch armv7 -isysroot $ISDKP"
+make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \
+     TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
+
+# iOS/ARM64
+ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ICC=$(xcrun --sdk iphoneos --find clang)
+ISDKF="-arch arm64 -isysroot $ISDKP"
+make CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
 </pre>
 </pre>
+
+<h3 id="consoles">Cross-compiling for consoles</h3>
+<p>
+Building LuaJIT for consoles requires both a supported host compiler
+(x86 or x64) and a cross-compiler (to PPC or ARM) from the official
+console SDK.
+</p>
+<p>
+Due to restrictions on consoles, the JIT compiler is disabled and only
+the fast interpreter is built. This is still faster than plain Lua,
+but much slower than the JIT compiler. The FFI is disabled, too, since
+it's not very useful in such an environment.
+</p>
+<p>
+The following commands build a static library <tt>libluajit.a</tt>,
+which can be linked against your game, just like the Lua library.
+</p>
 <p>
 <p>
-You can cross-compile for <b id="ps3">PS3</b> using the PS3&nbsp;SDK from
-a Linux host or a Windows host (requires 32 bit MinGW (GCC) on the host,
-too). Due to restrictions on consoles, the JIT compiler is disabled and
-only the fast interpreter is built:
+To cross-compile for <b id="ps3">PS3</b> from a Linux host (requires
+32&nbsp;bit GCC, i.e. multilib Linux/x64) or a Windows host (requires
+32&nbsp;bit MinGW), run this command:
 </p>
 </p>
 <pre class="code">
 <pre class="code">
 make HOST_CC="gcc -m32" CROSS=ppu-lv2-
 make HOST_CC="gcc -m32" CROSS=ppu-lv2-
 </pre>
 </pre>
 <p>
 <p>
-You can cross-compile for <b id="xbox360">Xbox 360</b> using the
-Xbox&nbsp;360 SDK (MSVC + XEDK). Due to restrictions on consoles, the
-JIT compiler is disabled and only the fast interpreter is built.
+To cross-compile for <b id="ps4">PS4</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and
+run the following commands:
 </p>
 </p>
+<pre class="code">
+cd src
+ps4build
+</pre>
 <p>
 <p>
-Open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
+To cross-compile for <b id="psvita">PS Vita</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and
+run the following commands:
+</p>
+<pre class="code">
+cd src
+psvitabuild
+</pre>
+<p>
+To cross-compile for <b id="xbox360">Xbox 360</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
 <tt>cd</tt> to the directory where you've unpacked the sources and run
 <tt>cd</tt> to the directory where you've unpacked the sources and run
-the following commands. This builds a static library <tt>luajit20.lib</tt>,
-which can be linked against your game, just like the Lua library.
+the following commands:
 </p>
 </p>
 <pre class="code">
 <pre class="code">
 cd src
 cd src
 xedkbuild
 xedkbuild
 </pre>
 </pre>
+<p>
+To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and run
+the following commands:
+</p>
+<pre class="code">
+cd src
+xb1build
+</pre>
 
 
 <h2 id="embed">Embedding LuaJIT</h2>
 <h2 id="embed">Embedding LuaJIT</h2>
 <p>
 <p>
@@ -565,9 +613,11 @@ for a regular distribution build:
 <ul>
 <ul>
 <li><tt>PREFIX</tt> overrides the installation path and should usually
 <li><tt>PREFIX</tt> overrides the installation path and should usually
 be set to <tt>/usr</tt>. Setting this also changes the module paths and
 be set to <tt>/usr</tt>. Setting this also changes the module paths and
-the <tt>-rpath</tt> of the shared library.</li>
+the paths needed to locate the shared library.</li>
 <li><tt>DESTDIR</tt> is an absolute path which allows you to install
 <li><tt>DESTDIR</tt> is an absolute path which allows you to install
 to a shadow tree instead of the root tree of the build system.</li>
 to a shadow tree instead of the root tree of the build system.</li>
+<li><tt>MULTILIB</tt> sets the architecture-specific library path component
+for multilib systems. The default is <tt>lib</tt>.</li>
 <li>Have a look at the top-level <tt>Makefile</tt> and <tt>src/Makefile</tt>
 <li>Have a look at the top-level <tt>Makefile</tt> and <tt>src/Makefile</tt>
 for additional variables to tweak. The following variables <em>may</em> be
 for additional variables to tweak. The following variables <em>may</em> be
 overridden, but it's <em>not</em> recommended, except for special needs
 overridden, but it's <em>not</em> recommended, except for special needs
@@ -603,7 +653,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 13 - 5
jni/LuaJIT-2.0.1/doc/luajit.html → jni/LuaJIT-2.1/doc/luajit.html

@@ -4,7 +4,7 @@
 <title>LuaJIT</title>
 <title>LuaJIT</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -38,6 +38,9 @@ table.os1 td {
 table.os2 td {
 table.os2 td {
   color: #ffa040;
   color: #ffa040;
 }
 }
+table.os3 td {
+  color: #40ffff;
+}
 table.compiler td {
 table.compiler td {
   color: #2080ff;
   color: #2080ff;
   background: #62bf41;
   background: #62bf41;
@@ -123,6 +126,8 @@ table.feature small {
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -147,7 +152,7 @@ Lua is a powerful, dynamic and light-weight programming language.
 It may be embedded or used as a general-purpose, stand-alone language.
 It may be embedded or used as a general-purpose, stand-alone language.
 </p>
 </p>
 <p>
 <p>
-LuaJIT is Copyright &copy; 2005-2013 Mike Pall, released under the
+LuaJIT is Copyright &copy; 2005-2015 Mike Pall, released under the
 <a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
 <a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
 </p>
 </p>
 <p>
 <p>
@@ -158,13 +163,16 @@ LuaJIT is Copyright &copy; 2005-2013 Mike Pall, released under the
 <tr><td>Windows</td><td>Linux</td><td>BSD</td><td>OSX</td><td>POSIX</td></tr>
 <tr><td>Windows</td><td>Linux</td><td>BSD</td><td>OSX</td><td>POSIX</td></tr>
 </table>
 </table>
 <table class="feature os os2">
 <table class="feature os os2">
-<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td><td>PS3</td><td>Xbox 360</td></tr>
+<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
+</table>
+<table class="feature os os3">
+<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
 </table>
 </table>
 <table class="feature compiler">
 <table class="feature compiler">
 <tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
 <tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
 </table>
 </table>
 <table class="feature cpu">
 <table class="feature cpu">
-<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr>
+<tr><td>x86</td><td>x64</td><td>ARM</td><td>ARM64</td><td>PPC</td><td>MIPS</td></tr>
 </table>
 </table>
 <table class="feature fcompat">
 <table class="feature fcompat">
 <tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
 <tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
@@ -218,7 +226,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 6 - 3
jni/LuaJIT-2.0.1/doc/running.html → jni/LuaJIT-2.1/doc/running.html

@@ -4,7 +4,7 @@
 <title>Running LuaJIT</title>
 <title>Running LuaJIT</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -63,6 +63,8 @@ td.param_default {
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a href="status.html">Status</a>
 <a href="status.html">Status</a>
@@ -178,6 +180,7 @@ Here are the available LuaJIT control commands:
 <li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
 <li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
 <li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
 <li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
 <li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
 <li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
+<li id="j_p"><tt>-jp</tt> &mdash; Start the <a href="ext_profiler.html">integrated profiler</a>.</li>
 </ul>
 </ul>
 <p>
 <p>
 The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
 The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
@@ -186,7 +189,7 @@ itself. For a description of their options and output format, please
 read the comment block at the start of their source.
 read the comment block at the start of their source.
 They can be found in the <tt>lib</tt> directory of the source
 They can be found in the <tt>lib</tt> directory of the source
 distribution or installed under the <tt>jit</tt> directory. By default
 distribution or installed under the <tt>jit</tt> directory. By default
-this is <tt>/usr/local/share/luajit-2.0.1/jit</tt> on POSIX
+this is <tt>/usr/local/share/luajit-2.0.4/jit</tt> on POSIX
 systems.
 systems.
 </p>
 </p>
 
 
@@ -296,7 +299,7 @@ Here are the parameters and their default settings:
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 6 - 13
jni/LuaJIT-2.0.1/doc/status.html → jni/LuaJIT-2.1/doc/status.html

@@ -1,10 +1,10 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <html>
 <html>
 <head>
 <head>
-<title>Status &amp; Roadmap</title>
+<title>Status</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 <meta name="Author" content="Mike Pall">
 <meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2013, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
 <meta name="Language" content="en">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -17,7 +17,7 @@ ul li { padding-bottom: 0.3em; }
 <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
 <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
 </div>
 </div>
 <div id="head">
 <div id="head">
-<h1>Status &amp; Roadmap</h1>
+<h1>Status</h1>
 </div>
 </div>
 <div id="nav">
 <div id="nav">
 <ul><li>
 <ul><li>
@@ -44,6 +44,8 @@ ul li { padding-bottom: 0.3em; }
 <a href="ext_jit.html">jit.* Library</a>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li></ul>
 </li><li>
 </li><li>
 <a class="current" href="status.html">Status</a>
 <a class="current" href="status.html">Status</a>
@@ -102,20 +104,11 @@ handled correctly. The error may fall through an on-trace
 garbage collector.
 garbage collector.
 </li>
 </li>
 </ul>
 </ul>
-
-<h2>Roadmap</h2>
-<p>
-Please refer to the
-<a href="http://www.freelists.org/post/luajit/LuaJIT-Roadmap-20122013"><span class="ext">&raquo;</span>&nbsp;LuaJIT Roadmap 2012/2013</a> and an
-<a href="http://www.freelists.org/post/luajit/LuaJIT-Roadmap-20122013-UPDATE"><span class="ext">&raquo;</span>&nbsp;update on release planning</a> for details.
-</p>
-<p>
-</p>
 <br class="flush">
 <br class="flush">
 </div>
 </div>
 <div id="foot">
 <div id="foot">
 <hr class="hide">
 <hr class="hide">
-Copyright &copy; 2005-2013 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
 <span class="noprint">
 <span class="noprint">
 &middot;
 &middot;
 <a href="contact.html">Contact</a>
 <a href="contact.html">Contact</a>

+ 3 - 2
jni/LuaJIT-2.0.1/dynasm/dasm_arm.h → jni/LuaJIT-2.1/dynasm/dasm_arm.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** DynASM ARM encoding engine.
 ** DynASM ARM encoding engine.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 
@@ -211,7 +211,8 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_REL_LG:
       case DASM_REL_LG:
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
-	if (n >= 0) { CKPL(lg, LG); goto putrel; }  /* Bkwd rel or global. */
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
 	pl += 10; n = *pl;
 	pl += 10; n = *pl;
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	goto linkrel;
 	goto linkrel;

+ 9 - 6
jni/LuaJIT-2.0.1/dynasm/dasm_arm.lua → jni/LuaJIT-2.1/dynasm/dasm_arm.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- DynASM ARM module.
 -- DynASM ARM module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
@@ -9,9 +9,9 @@
 local _info = {
 local _info = {
   arch =	"arm",
   arch =	"arm",
   description =	"DynASM ARM module",
   description =	"DynASM ARM module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }
@@ -923,19 +923,22 @@ local function parse_template(params, template, nparams, pos)
 end
 end
 
 
 map_op[".template__"] = function(params, template, nparams)
 map_op[".template__"] = function(params, template, nparams)
-  if not params then return sub(template, 9) end
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
 
 
   -- Limit number of section buffer positions used by a single dasm_put().
   -- Limit number of section buffer positions used by a single dasm_put().
   -- A single opcode needs a maximum of 3 positions.
   -- A single opcode needs a maximum of 3 positions.
   if secpos+3 > maxsecpos then wflush() end
   if secpos+3 > maxsecpos then wflush() end
   local pos = wpos()
   local pos = wpos()
-  local apos, spos = #actargs, secpos
+  local lpos, apos, spos = #actlist, #actargs, secpos
 
 
   local ok, err
   local ok, err
   for t in gmatch(template, "[^|]+") do
   for t in gmatch(template, "[^|]+") do
     ok, err = pcall(parse_template, params, t, nparams, pos)
     ok, err = pcall(parse_template, params, t, nparams, pos)
     if ok then return end
     if ok then return end
     secpos = spos
     secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
     actargs[apos+1] = nil
     actargs[apos+1] = nil
     actargs[apos+2] = nil
     actargs[apos+2] = nil
     actargs[apos+3] = nil
     actargs[apos+3] = nil

+ 518 - 0
jni/LuaJIT-2.1/dynasm/dasm_arm64.h

@@ -0,0 +1,518 @@
+/*
+** DynASM ARM64 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"arm64"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. */
+enum {
+  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+  /* The following actions need a buffer position. */
+  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+  /* The following actions also have an argument. */
+  DASM_REL_PC, DASM_LABEL_PC,
+  DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
+  DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals (bias -10). */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
+  }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+  if ((n >> 12) == 0)
+    return n;
+  else if ((n & 0xff000fff) == 0)
+    return (n >> 12) | 0x1000;
+  else
+    return -1;
+}
+
+static int dasm_ffs(unsigned long long x)
+{
+  int n = -1;
+  while (x) { x >>= 1; n++; }
+  return n;
+}
+
+static int dasm_imm13(int lo, int hi)
+{
+  int inv = 0, w = 64, s = 0xfff, xa, xb;
+  unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
+  unsigned long long m = 1ULL, a, b, c;
+  if (n & 1) { n = ~n; inv = 1; }
+  a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
+  xa = dasm_ffs(a); xb = dasm_ffs(b);
+  if (c) {
+    w = dasm_ffs(c) - xa;
+    if (w == 32) m = 0x0000000100000001UL;
+    else if (w == 16) m = 0x0001000100010001UL;
+    else if (w == 8) m = 0x0101010101010101UL;
+    else if (w == 4) m = 0x1111111111111111UL;
+    else if (w == 2) m = 0x5555555555555555UL;
+    else return -1;
+    s = (-2*w & 0x3f) - 1;
+  } else if (!a) {
+    return -1;
+  } else if (xb == -1) {
+    xb = 64;
+  }
+  if ((b-a) * m != n) return -1;
+  if (inv) {
+    return ((w - xb) << 6) | (s+w+xa-xb);
+  } else {
+    return ((w - xa) << 6) | (s+xb-xa);
+  }
+  return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    unsigned int ins = *p++;
+    unsigned int action = (ins >> 16);
+    if (action >= DASM__MAX) {
+      ofs += 4;
+    } else {
+      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+      switch (action) {
+      case DASM_STOP: goto stop;
+      case DASM_SECTION:
+	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+	D->section = &D->sections[n]; goto stop;
+      case DASM_ESC: p++; ofs += 4; break;
+      case DASM_REL_EXT: break;
+      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+      case DASM_REL_LG:
+	n = (ins & 2047) - 10; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl += 10; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	break;
+      case DASM_LABEL_LG:
+	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+	}
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_IMM:
+	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+	n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+	if ((ins & 0x8000))
+	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+	else
+	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+      case DASM_IMM6:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM12:
+	CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13W:
+	CK(dasm_imm13(n, n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13X: {
+	int m = va_arg(ap, int);
+	CK(dasm_imm13(n, m) != -1, RANGE_I);
+	b[pos++] = n;
+	b[pos++] = m;
+	break;
+	}
+      case DASM_IMML: {
+#ifdef DASM_CHECKS
+	int scale = (p[-2] >> 30);
+	CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
+	   (unsigned int)(n+256) < 512, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+	}
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: p++; break;
+	case DASM_REL_EXT: break;
+	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+	case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
+	case DASM_IMML: pos++; break;
+	case DASM_IMM13X: pos += 2; break;
+	}
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st)	((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned int *cp = (unsigned int *)buffer;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: *cp++ = *p++; break;
+	case DASM_REL_EXT:
+	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
+	  goto patchrel;
+	case DASM_ALIGN:
+	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+	  break;
+	case DASM_REL_LG:
+	  CK(n >= 0, UNDEF_LG);
+	case DASM_REL_PC:
+	  CK(n >= 0, UNDEF_PC);
+	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+	patchrel:
+	  if (!(ins & 0xf800)) {  /* B, BL */
+	    CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
+	    cp[-1] |= ((n >> 2) & 0x03ffffff);
+	  } else if ((ins & 0x800)) {  /* B.cond, CBZ, CBNZ, LDR* literal */
+	    CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0);
+	  } else if ((ins & 0x3000) == 0x2000) {  /* ADR */
+	    CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
+	  } else if ((ins & 0x3000) == 0x3000) {  /* ADRP */
+	    cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
+	  } else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
+	    CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x0007ffe0);
+	  }
+	  break;
+	case DASM_LABEL_LG:
+	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  break;
+	case DASM_LABEL_PC: break;
+	case DASM_IMM:
+	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+	  break;
+	case DASM_IMM6:
+	  cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
+	  break;
+	case DASM_IMM12:
+	  cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
+	  break;
+	case DASM_IMM13W:
+	  cp[-1] |= (dasm_imm13(n, n) << 10);
+	  break;
+	case DASM_IMM13X:
+	  cp[-1] |= (dasm_imm13(n, *b++) << 10);
+	  break;
+	case DASM_IMML: {
+	  int scale = (p[-2] >> 30);
+	  cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
+	    ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
+	  break;
+	  }
+	default: *cp++ = ins; break;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+  return D->status;
+}
+#endif
+

+ 1166 - 0
jni/LuaJIT-2.1/dynasm/dasm_arm64.lua

@@ -0,0 +1,1166 @@
+------------------------------------------------------------------------------
+-- DynASM ARM64 module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+  arch =	"arm",
+  description =	"DynASM ARM64 module",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "STOP", "SECTION", "ESC", "REL_EXT",
+  "ALIGN", "REL_LG", "LABEL_LG",
+  "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+  map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+  for i = 1,nn-1 do
+    assert(out:write("0x", tohex(actlist[i]), ",\n"))
+  end
+  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxw(w * 0x10000 + (val or 0))
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+  if n <= 0x000fffff then waction("ESC") end
+  wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  if n <= 0x000fffff then
+    insert(actlist, pos+1, n)
+    n = map_action.ESC * 0x10000
+  end
+  actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20,next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0,next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0,next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, }
+
+local map_extend = {
+  uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
+  sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
+}
+
+local map_cond = {
+  eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+  hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+  hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+local parse_reg_type
+
+local function parse_reg(expr)
+  if not expr then werror("expected register name") end
+  local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
+      if not parse_reg_type then
+	parse_reg_type = rt
+      elseif parse_reg_type ~= rt then
+	werror("register size mismatch")
+      end
+      return r, tp
+    end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_reg_base(expr)
+  if expr == "sp" then return 0x3e0 end
+  local base, tp = parse_reg(expr)
+  if parse_reg_type ~= "x" then werror("bad register type") end
+  parse_reg_type = false
+  return shl(base, 5), tp
+end
+
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n then
+      if signed then
+	local s = sar(m, bits-1)
+	if s == 0 then return shl(m, shift)
+	elseif s == -1 then return shl(m + shl(1, bits), shift) end
+      else
+	if sar(m, bits) == 0 then return shl(m, shift) end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    return 0
+  end
+end
+
+local function parse_imm12(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if shr(n, 12) == 0 then
+      return shl(n, 10)
+    elseif band(n, 0xff000fff) == 0 then
+      return shr(n, 2) + 0x00400000
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM12", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm13(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  local r64 = parse_reg_type == "x"
+  if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
+    local inv = false
+    if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
+    local t = {}
+    for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
+    local b = table.concat(t)
+    b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
+    local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
+    if p0 then
+      local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
+      if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
+	local s = band(-2*w, 0x3f) - 1
+	if w == 64 then s = s + 0x1000 end
+	if inv then
+	  return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
+	else
+	  return shl(w-#p0, 16) + shl(s+#p1, 10)
+	end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif r64 then
+    waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
+    actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
+    return 0
+  else
+    waction("IMM13W", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm6(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if n >= 0 and n <= 63 then
+      return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM6", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm_load(imm, scale)
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n and m >= 0 and m < 0x1000 then
+      return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
+    elseif n >= -256 and n < 256 then
+      return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMML", 0, imm)
+    return 0
+  end
+end
+
+local function parse_fpimm(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m, e = math.frexp(n)
+    local s, e2 = 0, band(e-2, 7)
+    if m < 0 then m = -m; s = 0x00100000 end
+    m = m*32-16
+    if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
+      return s + shl(e2, 17) + shl(m, 13)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    werror("NYI fpimm action")
+  end
+end
+
+local function parse_shift(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  s = map_shift[s]
+  if not s then werror("expected shift operand") end
+  return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
+end
+
+local function parse_lslx16(expr)
+  local n = match(expr, "^lsl%s*#(%d+)$")
+  n = tonumber(n)
+  if not n then werror("expected shift operand") end
+  if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
+    werror("bad shift amount")
+  end
+  return shl(n, 17)
+end
+
+local function parse_extend(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  if s == "lsl" then
+    s = parse_reg_type == "x" and 3 or 2
+  else
+    s = map_extend[s]
+  end
+  if not s then werror("expected extend operand") end
+  return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
+end
+
+local function parse_cond(expr, inv)
+  local c = map_cond[expr]
+  if not c then werror("expected condition operand") end
+  return shl(bit.bxor(c, inv), 12)
+end
+
+local function parse_load(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMML", 0, format(tp.ctypefmt, tailr))
+	  return op + base
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  local scale = shr(op, 30)
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+  elseif wb == "!" then
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if not p1a then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
+    op = op + parse_reg_base(p1a)
+    if p2a ~= "" then
+      local imm = match(p2a, "^,%s*#(.*)$")
+      if imm then
+	op = op + parse_imm_load(imm, scale)
+      else
+	local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+	op = op + shl(parse_reg(p2b), 16) + 0x00200800
+	if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+	  werror("bad index register type")
+	end
+	if p3b == "" then
+	  if parse_reg_type ~= "x" then werror("bad index register type") end
+	  op = op + 0x6000
+	else
+	  if p3s == "" or p3s == "#0" then
+	  elseif p3s == "#"..scale then
+	    op = op + 0x1000
+	  else
+	    werror("bad scale")
+	  end
+	  if parse_reg_type == "x" then
+	    if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
+	    elseif p3b == "sxtx" then op = op + 0xe000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  else
+	    if p3b == "uxtw" then op = op + 0x4000
+	    elseif p3b == "sxtw" then op = op + 0xc000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  end
+	end
+      end
+    else
+      if wb == "!" then werror("bad use of '!'") end
+      op = op + 0x01000000
+    end
+  end
+  return op
+end
+
+local function parse_load_pair(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local scale = shr(op, 30) == 0 and 2 or 3
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
+	  return op + base + 0x01000000
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + 0x00800000
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
+    op = op + (wb == "!" and 0x01800000 or 0x01000000)
+  end
+  return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
+end
+
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+end
+
+local function branch_type(op)
+  if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
+  elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
+	 band(op, 0x3b000000) == 0x18000000 then
+    return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
+  elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
+  elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
+  elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
+  else
+    assert(false, "unknown branch type")
+  end
+end
+
+------------------------------------------------------------------------------
+
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
+local function alias_bfx(p)
+  p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
+end
+
+local function alias_bfiz(p)
+  parse_reg(p[1])
+  if parse_reg_type == "w" then
+    p[3] = "#-("..p[3]:sub(2)..")%32"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  else
+    p[3] = "#-("..p[3]:sub(2)..")%64"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  end
+end
+
+local alias_lslimm = op_alias("ubfm_4", function(p)
+  parse_reg(p[1])
+  local sh = p[3]:sub(2)
+  if parse_reg_type == "w" then
+    p[3] = "#-("..sh..")%32"
+    p[4] = "#31-("..sh..")"
+  else
+    p[3] = "#-("..sh..")%64"
+    p[4] = "#63-("..sh..")"
+  end
+end)
+
+-- Template strings for ARM instructions.
+map_op = {
+  -- Basic data processing instructions.
+  add_3  = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
+  add_4  = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
+  adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
+  adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
+  cmn_2  = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
+  cmn_3  = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
+
+  sub_3  = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
+  sub_4  = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
+  subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
+  subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
+  cmp_2  = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
+  cmp_3  = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
+
+  neg_2  = "4b0003e0DMg",
+  neg_3  = "4b0003e0DMSg",
+  negs_2 = "6b0003e0DMg",
+  negs_3 = "6b0003e0DMSg",
+
+  adc_3  = "1a000000DNMg",
+  adcs_3 = "3a000000DNMg",
+  sbc_3  = "5a000000DNMg",
+  sbcs_3 = "7a000000DNMg",
+  ngc_2  = "5a0003e0DMg",
+  ngcs_2 = "7a0003e0DMg",
+
+  and_3  = "0a000000DNMg|12000000pDNig",
+  and_4  = "0a000000DNMSg",
+  orr_3  = "2a000000DNMg|32000000pDNig",
+  orr_4  = "2a000000DNMSg",
+  eor_3  = "4a000000DNMg|52000000pDNig",
+  eor_4  = "4a000000DNMSg",
+  ands_3 = "6a000000DNMg|72000000DNig",
+  ands_4 = "6a000000DNMSg",
+  tst_2  = "6a00001fNMg|7200001fNig",
+  tst_3  = "6a00001fNMSg",
+
+  bic_3  = "0a200000DNMg",
+  bic_4  = "0a200000DNMSg",
+  orn_3  = "2a200000DNMg",
+  orn_4  = "2a200000DNMSg",
+  eon_3  = "4a200000DNMg",
+  eon_4  = "4a200000DNMSg",
+  bics_3 = "6a200000DNMg",
+  bics_4 = "6a200000DNMSg",
+
+  movn_2 = "12800000DWg",
+  movn_3 = "12800000DWRg",
+  movz_2 = "52800000DWg",
+  movz_3 = "52800000DWRg",
+  movk_2 = "72800000DWg",
+  movk_3 = "72800000DWRg",
+
+  -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
+  mov_2  = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
+  mov_3  = "2a0003e0DMSg",
+  mvn_2  = "2a2003e0DMg",
+  mvn_3  = "2a2003e0DMSg",
+
+  adr_2  = "10000000DBx",
+  adrp_2 = "90000000DBx",
+
+  csel_4  = "1a800000DNMCg",
+  csinc_4 = "1a800400DNMCg",
+  csinv_4 = "5a800000DNMCg",
+  csneg_4 = "5a800400DNMCg",
+  cset_2  = "1a9f07e0Dcg",
+  csetm_2 = "5a9f03e0Dcg",
+  cinc_3  = "1a800400DNmcg",
+  cinv_3  = "5a800000DNmcg",
+  cneg_3  = "5a800400DNmcg",
+
+  ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
+  ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
+
+  madd_4 = "1b000000DNMAg",
+  msub_4 = "1b008000DNMAg",
+  mul_3  = "1b007c00DNMg",
+  mneg_3 = "1b00fc00DNMg",
+
+  smaddl_4 = "9b200000DxNMwAx",
+  smsubl_4 = "9b208000DxNMwAx",
+  smull_3  = "9b207c00DxNMw",
+  smnegl_3 = "9b20fc00DxNMw",
+  smulh_3  = "9b407c00DNMx",
+  umaddl_4 = "9ba00000DxNMwAx",
+  umsubl_4 = "9ba08000DxNMwAx",
+  umull_3  = "9ba07c00DxNMw",
+  umnegl_3 = "9ba0fc00DxNMw",
+  umulh_3  = "9bc07c00DNMx",
+
+  udiv_3 = "1ac00800DNMg",
+  sdiv_3 = "1ac00c00DNMg",
+
+  -- Bit operations.
+  sbfm_4 = "13000000DN12w|93400000DN12x",
+  bfm_4  = "33000000DN12w|b3400000DN12x",
+  ubfm_4 = "53000000DN12w|d3400000DN12x",
+  extr_4 = "13800000DNM2w|93c00000DNM2x",
+
+  sxtb_2 = "13001c00DNw|93401c00DNx",
+  sxth_2 = "13003c00DNw|93403c00DNx",
+  sxtw_2 = "93407c00DxNw",
+  uxtb_2 = "53001c00DNw",
+  uxth_2 = "53003c00DNw",
+
+  sbfx_4  = op_alias("sbfm_4", alias_bfx),
+  bfxil_4 = op_alias("bfm_4", alias_bfx),
+  ubfx_4  = op_alias("ubfm_4", alias_bfx),
+  sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
+  bfi_4   = op_alias("bfm_4", alias_bfiz),
+  ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
+
+  lsl_3  = function(params, nparams)
+    if params and params[3]:byte() == 35 then
+      return alias_lslimm(params, nparams)
+    else
+      return op_template(params, "1ac02000DNMg", nparams)
+    end
+  end,
+  lsr_3  = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
+  asr_3  = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
+  ror_3  = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
+
+  clz_2   = "5ac01000DNg",
+  cls_2   = "5ac01400DNg",
+  rbit_2  = "5ac00000DNg",
+  rev_2   = "5ac00800DNw|dac00c00DNx",
+  rev16_2 = "5ac00400DNg",
+  rev32_2 = "dac00800DNx",
+
+  -- Loads and stores.
+  ["strb_*"]  = "38000000DwL",
+  ["ldrb_*"]  = "38400000DwL",
+  ["ldrsb_*"] = "38c00000DwL|38800000DxL",
+  ["strh_*"]  = "78000000DwL",
+  ["ldrh_*"]  = "78400000DwL",
+  ["ldrsh_*"] = "78c00000DwL|78800000DxL",
+  ["str_*"]   = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
+  ["ldr_*"]   = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
+  ["ldrsw_*"] = "98000000DxB|b8800000DxL",
+  -- NOTE: ldur etc. are handled by ldr et al.
+
+  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
+  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+  ["ldpsw_*"] = "68400000DAxP",
+
+  -- Branches.
+  b_1    = "14000000B",
+  bl_1   = "94000000B",
+  blr_1  = "d63f0000Nx",
+  br_1   = "d61f0000Nx",
+  ret_0  = "d65f03c0",
+  ret_1  = "d65f0000Nx",
+  -- b.cond is added below.
+  cbz_2  = "34000000DBg",
+  cbnz_2 = "35000000DBg",
+  tbz_3  = "36000000DTBw|36000000DTBx",
+  tbnz_3 = "37000000DTBw|37000000DTBx",
+
+  -- Miscellaneous instructions.
+  -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
+  -- TODO: sys, sysl, ic, dc, at, tlbi
+  -- TODO: hint, yield, wfe, wfi, sev, sevl
+  -- TODO: clrex, dsb, dmb, isb
+  nop_0  = "d503201f",
+  brk_0  = "d4200000",
+  brk_1  = "d4200000W",
+
+  -- Floating point instructions.
+  fmov_2  = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
+  fabs_2  = "1e20c000DNf",
+  fneg_2  = "1e214000DNf",
+  fsqrt_2 = "1e21c000DNf",
+
+  fcvt_2  = "1e22c000DdNs|1e624000DsNd",
+
+  -- TODO: half-precision and fixed-point conversions.
+  fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
+  fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
+  fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
+  fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
+  fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
+  fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
+  fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
+  fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
+  fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
+  fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
+
+  scvtf_2  = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
+  ucvtf_2  = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
+
+  frintn_2 = "1e244000DNf",
+  frintp_2 = "1e24c000DNf",
+  frintm_2 = "1e254000DNf",
+  frintz_2 = "1e25c000DNf",
+  frinta_2 = "1e264000DNf",
+  frintx_2 = "1e274000DNf",
+  frinti_2 = "1e27c000DNf",
+
+  fadd_3   = "1e202800DNMf",
+  fsub_3   = "1e203800DNMf",
+  fmul_3   = "1e200800DNMf",
+  fnmul_3  = "1e208800DNMf",
+  fdiv_3   = "1e201800DNMf",
+
+  fmadd_4  = "1f000000DNMAf",
+  fmsub_4  = "1f008000DNMAf",
+  fnmadd_4 = "1f200000DNMAf",
+  fnmsub_4 = "1f208000DNMAf",
+
+  fmax_3   = "1e204800DNMf",
+  fmaxnm_3 = "1e206800DNMf",
+  fmin_3   = "1e205800DNMf",
+  fminnm_3 = "1e207800DNMf",
+
+  fcmp_2   = "1e202000NMf|1e202008NZf",
+  fcmpe_2  = "1e202010NMf|1e202018NZf",
+
+  fccmp_4  = "1e200400NMVCf",
+  fccmpe_4 = "1e200410NMVCf",
+
+  fcsel_4  = "1e200c00DNMCf",
+
+  -- TODO: crc32*, aes*, sha*, pmull
+  -- TODO: SIMD instructions.
+}
+
+for cond,c in pairs(map_cond) do
+  map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+  local op = tonumber(sub(template, 1, 8), 16)
+  local n = 1
+  local rtt = {}
+
+  parse_reg_type = false
+
+  -- Process each character.
+  for p in gmatch(sub(template, 9), ".") do
+    local q = params[n]
+    if p == "D" then
+      op = op + parse_reg(q); n = n + 1
+    elseif p == "N" then
+      op = op + shl(parse_reg(q), 5); n = n + 1
+    elseif p == "M" then
+      op = op + shl(parse_reg(q), 16); n = n + 1
+    elseif p == "A" then
+      op = op + shl(parse_reg(q), 10); n = n + 1
+    elseif p == "m" then
+      op = op + shl(parse_reg(params[n-1]), 16)
+
+    elseif p == "p" then
+      if q == "sp" then params[n] = "@x31" end
+    elseif p == "g" then
+      if parse_reg_type == "x" then
+	op = op + 0x80000000
+      elseif parse_reg_type ~= "w" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "f" then
+      if parse_reg_type == "d" then
+	op = op + 0x00400000
+      elseif parse_reg_type ~= "s" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "x" or p == "w" or p == "d" or p == "s" then
+      if parse_reg_type ~= p then
+	werror("register size mismatch")
+      end
+      parse_reg_type = false
+
+    elseif p == "L" then
+      op = parse_load(params, nparams, n, op)
+    elseif p == "P" then
+      op = parse_load_pair(params, nparams, n, op)
+
+    elseif p == "B" then
+      local mode, v, s = parse_label(q, false); n = n + 1
+      local m = branch_type(op)
+      waction("REL_"..mode, v+m, s, 1)
+
+    elseif p == "I" then
+      op = op + parse_imm12(q); n = n + 1
+    elseif p == "i" then
+      op = op + parse_imm13(q); n = n + 1
+    elseif p == "W" then
+      op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
+    elseif p == "T" then
+      op = op + parse_imm6(q); n = n + 1
+    elseif p == "1" then
+      op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
+    elseif p == "2" then
+      op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
+    elseif p == "5" then
+      op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
+    elseif p == "V" then
+      op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
+    elseif p == "F" then
+      op = op + parse_fpimm(q); n = n + 1
+    elseif p == "Z" then
+      if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
+      n = n + 1
+
+    elseif p == "S" then
+      op = op + parse_shift(q); n = n + 1
+    elseif p == "X" then
+      op = op + parse_extend(q); n = n + 1
+    elseif p == "R" then
+      op = op + parse_lslx16(q); n = n + 1
+    elseif p == "C" then
+      op = op + parse_cond(q, 0); n = n + 1
+    elseif p == "c" then
+      op = op + parse_cond(q, 1); n = n + 1
+
+    else
+      assert(false)
+    end
+  end
+  wputpos(pos, op)
+end
+
+function op_template(params, template, nparams)
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 3 positions.
+  if secpos+3 > maxsecpos then wflush() end
+  local pos = wpos()
+  local lpos, apos, spos = #actlist, #actargs, secpos
+
+  local ok, err
+  for t in gmatch(template, "[^|]+") do
+    ok, err = pcall(parse_template, params, t, nparams, pos)
+    if ok then return end
+    secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
+    actargs[apos+1] = nil
+    actargs[apos+2] = nil
+    actargs[apos+3] = nil
+  end
+  error(err, 0)
+end
+
+map_op[".template__"] = op_template
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _,p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+

+ 3 - 2
jni/LuaJIT-2.0.1/dynasm/dasm_mips.h → jni/LuaJIT-2.1/dynasm/dasm_mips.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** DynASM MIPS encoding engine.
 ** DynASM MIPS encoding engine.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 
@@ -202,7 +202,8 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_REL_LG:
       case DASM_REL_LG:
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
-	if (n >= 0) { CKPL(lg, LG); goto putrel; }  /* Bkwd rel or global. */
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
 	pl += 10; n = *pl;
 	pl += 10; n = *pl;
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	goto linkrel;
 	goto linkrel;

+ 4 - 4
jni/LuaJIT-2.0.1/dynasm/dasm_mips.lua → jni/LuaJIT-2.1/dynasm/dasm_mips.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- DynASM MIPS module.
 -- DynASM MIPS module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
@@ -9,9 +9,9 @@
 local _info = {
 local _info = {
   arch =	"mips",
   arch =	"mips",
   description =	"DynASM MIPS module",
   description =	"DynASM MIPS module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2012-01-23",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }

+ 13 - 5
jni/LuaJIT-2.0.1/dynasm/dasm_ppc.h → jni/LuaJIT-2.1/dynasm/dasm_ppc.h

@@ -1,6 +1,6 @@
 /*
 /*
-** DynASM PPC encoding engine.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** DynASM PPC/PPC64 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 
@@ -21,7 +21,7 @@ enum {
   /* The following actions need a buffer position. */
   /* The following actions need a buffer position. */
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   /* The following actions also have an argument. */
   /* The following actions also have an argument. */
-  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
   DASM__MAX
   DASM__MAX
 };
 };
 
 
@@ -202,7 +202,8 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_REL_LG:
       case DASM_REL_LG:
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
-	if (n >= 0) { CKPL(lg, LG); goto putrel; }  /* Bkwd rel or global. */
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
 	pl += 10; n = *pl;
 	pl += 10; n = *pl;
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	goto linkrel;
 	goto linkrel;
@@ -243,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
 #endif
 #endif
 	b[pos++] = n;
 	b[pos++] = n;
 	break;
 	break;
+      case DASM_IMMSH:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
       }
       }
     }
     }
   }
   }
@@ -298,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-	case DASM_IMM: pos++; break;
+	case DASM_IMM: case DASM_IMMSH: pos++; break;
 	}
 	}
       }
       }
       stop: (void)0;
       stop: (void)0;
@@ -365,6 +370,9 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_IMM:
 	case DASM_IMM:
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  break;
 	  break;
+	case DASM_IMMSH:
+	  cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
+	  break;
 	default: *cp++ = ins; break;
 	default: *cp++ = ins; break;
 	}
 	}
       }
       }

+ 687 - 17
jni/LuaJIT-2.0.1/dynasm/dasm_ppc.lua → jni/LuaJIT-2.1/dynasm/dasm_ppc.lua

@@ -1,17 +1,19 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
--- DynASM PPC module.
+-- DynASM PPC/PPC64 module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
+--
+-- Support for various extensions contributed by Caio Souza Oliveira.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Module information:
 -- Module information:
 local _info = {
 local _info = {
   arch =	"ppc",
   arch =	"ppc",
   description =	"DynASM PPC module",
   description =	"DynASM PPC module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
 local action_names = {
 local action_names = {
   "STOP", "SECTION", "ESC", "REL_EXT",
   "STOP", "SECTION", "ESC", "REL_EXT",
   "ALIGN", "REL_LG", "LABEL_LG",
   "ALIGN", "REL_LG", "LABEL_LG",
-  "REL_PC", "LABEL_PC", "IMM",
+  "REL_PC", "LABEL_PC", "IMM", "IMMSH"
 }
 }
 
 
 -- Maximum number of section buffer positions for dasm_put().
 -- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
 -- Template strings for PPC instructions.
 -- Template strings for PPC instructions.
-local map_op = {
+map_op = {
   tdi_3 =	"08000000ARI",
   tdi_3 =	"08000000ARI",
   twi_3 =	"0c000000ARI",
   twi_3 =	"0c000000ARI",
   mulli_3 =	"1c000000RRI",
   mulli_3 =	"1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
   std_2 =	"f8000000RD",
   std_2 =	"f8000000RD",
   stdu_2 =	"f8000001RD",
   stdu_2 =	"f8000001RD",
 
 
+  subi_3 =	op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
+  subis_3 =	op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
+  subic_3 =	op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
+  ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
+
+  rotlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  rotrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
+  end),
+  rotlw_3 =	op_alias("rlwnm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  slwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[4] = "0"
+  end),
+  srwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
+  end),
+  clrlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "0"; p[5] = "31"
+  end),
+  clrrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
+  end),
+
+  -- Primary opcode 4:
+  mulhhwu_3 =		"10000010RRR.",
+  machhwu_3 =		"10000018RRR.",
+  mulhhw_3 =		"10000050RRR.",
+  nmachhw_3 =		"1000005cRRR.",
+  machhwsu_3 =		"10000098RRR.",
+  machhws_3 =		"100000d8RRR.",
+  nmachhws_3 =		"100000dcRRR.",
+  mulchwu_3 =		"10000110RRR.",
+  macchwu_3 =		"10000118RRR.",
+  mulchw_3 =		"10000150RRR.",
+  macchw_3 =		"10000158RRR.",
+  nmacchw_3 =		"1000015cRRR.",
+  macchwsu_3 =		"10000198RRR.",
+  macchws_3 =		"100001d8RRR.",
+  nmacchws_3 =		"100001dcRRR.",
+  mullhw_3 =		"10000350RRR.",
+  maclhw_3 =		"10000358RRR.",
+  nmaclhw_3 =		"1000035cRRR.",
+  maclhwsu_3 =		"10000398RRR.",
+  maclhws_3 =		"100003d8RRR.",
+  nmaclhws_3 =		"100003dcRRR.",
+  machhwuo_3 =		"10000418RRR.",
+  nmachhwo_3 =		"1000045cRRR.",
+  machhwsuo_3 =		"10000498RRR.",
+  machhwso_3 =		"100004d8RRR.",
+  nmachhwso_3 =		"100004dcRRR.",
+  macchwuo_3 =		"10000518RRR.",
+  macchwo_3 =		"10000558RRR.",
+  nmacchwo_3 =		"1000055cRRR.",
+  macchwsuo_3 =		"10000598RRR.",
+  macchwso_3 =		"100005d8RRR.",
+  nmacchwso_3 =		"100005dcRRR.",
+  maclhwo_3 =		"10000758RRR.",
+  nmaclhwo_3 =		"1000075cRRR.",
+  maclhwsuo_3 =		"10000798RRR.",
+  maclhwso_3 =		"100007d8RRR.",
+  nmaclhwso_3 =		"100007dcRRR.",
+
+  vaddubm_3 =		"10000000VVV",
+  vmaxub_3 =		"10000002VVV",
+  vrlb_3 =		"10000004VVV",
+  vcmpequb_3 =		"10000006VVV",
+  vmuloub_3 =		"10000008VVV",
+  vaddfp_3 =		"1000000aVVV",
+  vmrghb_3 =		"1000000cVVV",
+  vpkuhum_3 =		"1000000eVVV",
+  vmhaddshs_4 =		"10000020VVVV",
+  vmhraddshs_4 =	"10000021VVVV",
+  vmladduhm_4 =		"10000022VVVV",
+  vmsumubm_4 =		"10000024VVVV",
+  vmsummbm_4 =		"10000025VVVV",
+  vmsumuhm_4 =		"10000026VVVV",
+  vmsumuhs_4 =		"10000027VVVV",
+  vmsumshm_4 =		"10000028VVVV",
+  vmsumshs_4 =		"10000029VVVV",
+  vsel_4 =		"1000002aVVVV",
+  vperm_4 =		"1000002bVVVV",
+  vsldoi_4 =		"1000002cVVVP",
+  vpermxor_4 =		"1000002dVVVV",
+  vmaddfp_4 =		"1000002eVVVV~",
+  vnmsubfp_4 =		"1000002fVVVV~",
+  vaddeuqm_4 =		"1000003cVVVV",
+  vaddecuq_4 =		"1000003dVVVV",
+  vsubeuqm_4 =		"1000003eVVVV",
+  vsubecuq_4 =		"1000003fVVVV",
+  vadduhm_3 =		"10000040VVV",
+  vmaxuh_3 =		"10000042VVV",
+  vrlh_3 =		"10000044VVV",
+  vcmpequh_3 =		"10000046VVV",
+  vmulouh_3 =		"10000048VVV",
+  vsubfp_3 =		"1000004aVVV",
+  vmrghh_3 =		"1000004cVVV",
+  vpkuwum_3 =		"1000004eVVV",
+  vadduwm_3 =		"10000080VVV",
+  vmaxuw_3 =		"10000082VVV",
+  vrlw_3 =		"10000084VVV",
+  vcmpequw_3 =		"10000086VVV",
+  vmulouw_3 =		"10000088VVV",
+  vmuluwm_3 =		"10000089VVV",
+  vmrghw_3 =		"1000008cVVV",
+  vpkuhus_3 =		"1000008eVVV",
+  vaddudm_3 =		"100000c0VVV",
+  vmaxud_3 =		"100000c2VVV",
+  vrld_3 =		"100000c4VVV",
+  vcmpeqfp_3 =		"100000c6VVV",
+  vcmpequd_3 =		"100000c7VVV",
+  vpkuwus_3 =		"100000ceVVV",
+  vadduqm_3 =		"10000100VVV",
+  vmaxsb_3 =		"10000102VVV",
+  vslb_3 =		"10000104VVV",
+  vmulosb_3 =		"10000108VVV",
+  vrefp_2 =		"1000010aV-V",
+  vmrglb_3 =		"1000010cVVV",
+  vpkshus_3 =		"1000010eVVV",
+  vaddcuq_3 =		"10000140VVV",
+  vmaxsh_3 =		"10000142VVV",
+  vslh_3 =		"10000144VVV",
+  vmulosh_3 =		"10000148VVV",
+  vrsqrtefp_2 =		"1000014aV-V",
+  vmrglh_3 =		"1000014cVVV",
+  vpkswus_3 =		"1000014eVVV",
+  vaddcuw_3 =		"10000180VVV",
+  vmaxsw_3 =		"10000182VVV",
+  vslw_3 =		"10000184VVV",
+  vmulosw_3 =		"10000188VVV",
+  vexptefp_2 =		"1000018aV-V",
+  vmrglw_3 =		"1000018cVVV",
+  vpkshss_3 =		"1000018eVVV",
+  vmaxsd_3 =		"100001c2VVV",
+  vsl_3 =		"100001c4VVV",
+  vcmpgefp_3 =		"100001c6VVV",
+  vlogefp_2 =		"100001caV-V",
+  vpkswss_3 =		"100001ceVVV",
+  vadduhs_3 =		"10000240VVV",
+  vminuh_3 =		"10000242VVV",
+  vsrh_3 =		"10000244VVV",
+  vcmpgtuh_3 =		"10000246VVV",
+  vmuleuh_3 =		"10000248VVV",
+  vrfiz_2 =		"1000024aV-V",
+  vsplth_3 =		"1000024cVV3",
+  vupkhsh_2 =		"1000024eV-V",
+  vminuw_3 =		"10000282VVV",
+  vminud_3 =		"100002c2VVV",
+  vcmpgtud_3 =		"100002c7VVV",
+  vrfim_2 =		"100002caV-V",
+  vcmpgtsb_3 =		"10000306VVV",
+  vcfux_3 =		"1000030aVVA~",
+  vaddshs_3 =		"10000340VVV",
+  vminsh_3 =		"10000342VVV",
+  vsrah_3 =		"10000344VVV",
+  vcmpgtsh_3 =		"10000346VVV",
+  vmulesh_3 =		"10000348VVV",
+  vcfsx_3 =		"1000034aVVA~",
+  vspltish_2 =		"1000034cVS",
+  vupkhpx_2 =		"1000034eV-V",
+  vaddsws_3 =		"10000380VVV",
+  vminsw_3 =		"10000382VVV",
+  vsraw_3 =		"10000384VVV",
+  vcmpgtsw_3 =		"10000386VVV",
+  vmulesw_3 =		"10000388VVV",
+  vctuxs_3 =		"1000038aVVA~",
+  vspltisw_2 =		"1000038cVS",
+  vminsd_3 =		"100003c2VVV",
+  vsrad_3 =		"100003c4VVV",
+  vcmpbfp_3 =		"100003c6VVV",
+  vcmpgtsd_3 =		"100003c7VVV",
+  vctsxs_3 =		"100003caVVA~",
+  vupklpx_2 =		"100003ceV-V",
+  vsububm_3 =		"10000400VVV",
+  ["bcdadd._4"] =	"10000401VVVy.",
+  vavgub_3 =		"10000402VVV",
+  vand_3 =		"10000404VVV",
+  ["vcmpequb._3"] =	"10000406VVV",
+  vmaxfp_3 =		"1000040aVVV",
+  vsubuhm_3 =		"10000440VVV",
+  ["bcdsub._4"] =	"10000441VVVy.",
+  vavguh_3 =		"10000442VVV",
+  vandc_3 =		"10000444VVV",
+  ["vcmpequh._3"] =	"10000446VVV",
+  vminfp_3 =		"1000044aVVV",
+  vpkudum_3 =		"1000044eVVV",
+  vsubuwm_3 =		"10000480VVV",
+  vavguw_3 =		"10000482VVV",
+  vor_3 =		"10000484VVV",
+  ["vcmpequw._3"] =	"10000486VVV",
+  vpmsumw_3 =		"10000488VVV",
+  ["vcmpeqfp._3"] =	"100004c6VVV",
+  ["vcmpequd._3"] =	"100004c7VVV",
+  vpkudus_3 =		"100004ceVVV",
+  vavgsb_3 =		"10000502VVV",
+  vavgsh_3 =		"10000542VVV",
+  vorc_3 =		"10000544VVV",
+  vbpermq_3 =		"1000054cVVV",
+  vpksdus_3 =		"1000054eVVV",
+  vavgsw_3 =		"10000582VVV",
+  vsld_3 =		"100005c4VVV",
+  ["vcmpgefp._3"] =	"100005c6VVV",
+  vpksdss_3 =		"100005ceVVV",
+  vsububs_3 =		"10000600VVV",
+  mfvscr_1 =		"10000604V--",
+  vsum4ubs_3 =		"10000608VVV",
+  vsubuhs_3 =		"10000640VVV",
+  mtvscr_1 =		"10000644--V",
+  ["vcmpgtuh._3"] =	"10000646VVV",
+  vsum4shs_3 =		"10000648VVV",
+  vupkhsw_2 =		"1000064eV-V",
+  vsubuws_3 =		"10000680VVV",
+  vshasigmaw_4 =	"10000682VVYp",
+  veqv_3 =		"10000684VVV",
+  vsum2sws_3 =		"10000688VVV",
+  vmrgow_3 =		"1000068cVVV",
+  vshasigmad_4 =	"100006c2VVYp",
+  vsrd_3 =		"100006c4VVV",
+  ["vcmpgtud._3"] =	"100006c7VVV",
+  vupklsw_2 =		"100006ceV-V",
+  vupkslw_2 =		"100006ceV-V",
+  vsubsbs_3 =		"10000700VVV",
+  vclzb_2 =		"10000702V-V",
+  vpopcntb_2 =		"10000703V-V",
+  ["vcmpgtsb._3"] =	"10000706VVV",
+  vsum4sbs_3 =		"10000708VVV",
+  vsubshs_3 =		"10000740VVV",
+  vclzh_2 =		"10000742V-V",
+  vpopcnth_2 =		"10000743V-V",
+  ["vcmpgtsh._3"] =	"10000746VVV",
+  vsubsws_3 =		"10000780VVV",
+  vclzw_2 =		"10000782V-V",
+  vpopcntw_2 =		"10000783V-V",
+  ["vcmpgtsw._3"] =	"10000786VVV",
+  vsumsws_3 =		"10000788VVV",
+  vmrgew_3 =		"1000078cVVV",
+  vclzd_2 =		"100007c2V-V",
+  vpopcntd_2 =		"100007c3V-V",
+  ["vcmpbfp._3"] =	"100007c6VVV",
+  ["vcmpgtsd._3"] =	"100007c7VVV",
+
   -- Primary opcode 19:
   -- Primary opcode 19:
   mcrf_2 =	"4c000000XX",
   mcrf_2 =	"4c000000XX",
   isync_0 =	"4c00012c",
   isync_0 =	"4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
   bclrl_2 =	"4c000021AA",
   bclrl_2 =	"4c000021AA",
   bcctr_2 =	"4c000420AA",
   bcctr_2 =	"4c000420AA",
   bcctrl_2 =	"4c000421AA",
   bcctrl_2 =	"4c000421AA",
+  bctar_2 =	"4c000460AA",
+  bctarl_2 =	"4c000461AA",
   blr_0 =	"4e800020",
   blr_0 =	"4e800020",
   blrl_0 =	"4e800021",
   blrl_0 =	"4e800021",
   bctr_0 =	"4e800420",
   bctr_0 =	"4e800420",
@@ -327,6 +585,7 @@ local map_op = {
   cmpd_3 =	"7c200000XRR",
   cmpd_3 =	"7c200000XRR",
   cmpd_2 =	"7c200000-RR",
   cmpd_2 =	"7c200000-RR",
   tw_3 =	"7c000008ARR",
   tw_3 =	"7c000008ARR",
+  lvsl_3 =	"7c00000cVRR",
   subfc_3 =	"7c000010RRR.",
   subfc_3 =	"7c000010RRR.",
   subc_3 =	"7c000010RRR~.",
   subc_3 =	"7c000010RRR~.",
   mulhdu_3 =	"7c000012RRR.",
   mulhdu_3 =	"7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
   cmplw_2 =	"7c000040-RR",
   cmplw_2 =	"7c000040-RR",
   cmpld_3 =	"7c200040XRR",
   cmpld_3 =	"7c200040XRR",
   cmpld_2 =	"7c200040-RR",
   cmpld_2 =	"7c200040-RR",
+  lvsr_3 =	"7c00004cVRR",
   subf_3 =	"7c000050RRR.",
   subf_3 =	"7c000050RRR.",
   sub_3 =	"7c000050RRR~.",
   sub_3 =	"7c000050RRR~.",
+  lbarx_3 =	"7c000068RR0R",
   ldux_3 =	"7c00006aRR0R",
   ldux_3 =	"7c00006aRR0R",
   dcbst_2 =	"7c00006c-RR",
   dcbst_2 =	"7c00006c-RR",
   lwzux_3 =	"7c00006eRR0R",
   lwzux_3 =	"7c00006eRR0R",
   cntlzd_2 =	"7c000074RR~",
   cntlzd_2 =	"7c000074RR~",
   andc_3 =	"7c000078RR~R.",
   andc_3 =	"7c000078RR~R.",
   td_3 =	"7c000088ARR",
   td_3 =	"7c000088ARR",
+  lvewx_3 =	"7c00008eVRR",
   mulhd_3 =	"7c000092RRR.",
   mulhd_3 =	"7c000092RRR.",
+  addg6s_3 =	"7c000094RRR",
   mulhw_3 =	"7c000096RRR.",
   mulhw_3 =	"7c000096RRR.",
+  dlmzb_3 =	"7c00009cRR~R.",
   ldarx_3 =	"7c0000a8RR0R",
   ldarx_3 =	"7c0000a8RR0R",
   dcbf_2 =	"7c0000ac-RR",
   dcbf_2 =	"7c0000ac-RR",
   lbzx_3 =	"7c0000aeRR0R",
   lbzx_3 =	"7c0000aeRR0R",
+  lvx_3 =	"7c0000ceVRR",
   neg_2 =	"7c0000d0RR.",
   neg_2 =	"7c0000d0RR.",
+  lharx_3 =	"7c0000e8RR0R",
   lbzux_3 =	"7c0000eeRR0R",
   lbzux_3 =	"7c0000eeRR0R",
   popcntb_2 =	"7c0000f4RR~",
   popcntb_2 =	"7c0000f4RR~",
   not_2 =	"7c0000f8RR~%.",
   not_2 =	"7c0000f8RR~%.",
   nor_3 =	"7c0000f8RR~R.",
   nor_3 =	"7c0000f8RR~R.",
+  stvebx_3 =	"7c00010eVRR",
   subfe_3 =	"7c000110RRR.",
   subfe_3 =	"7c000110RRR.",
   sube_3 =	"7c000110RRR~.",
   sube_3 =	"7c000110RRR~.",
   adde_3 =	"7c000114RRR.",
   adde_3 =	"7c000114RRR.",
   stdx_3 =	"7c00012aRR0R",
   stdx_3 =	"7c00012aRR0R",
-  stwcx_3 =	"7c00012cRR0R.",
+  ["stwcx._3"] =	"7c00012dRR0R.",
   stwx_3 =	"7c00012eRR0R",
   stwx_3 =	"7c00012eRR0R",
   prtyw_2 =	"7c000134RR~",
   prtyw_2 =	"7c000134RR~",
+  stvehx_3 =	"7c00014eVRR",
   stdux_3 =	"7c00016aRR0R",
   stdux_3 =	"7c00016aRR0R",
+  ["stqcx._3"] =	"7c00016dR:R0R.",
   stwux_3 =	"7c00016eRR0R",
   stwux_3 =	"7c00016eRR0R",
   prtyd_2 =	"7c000174RR~",
   prtyd_2 =	"7c000174RR~",
+  stvewx_3 =	"7c00018eVRR",
   subfze_2 =	"7c000190RR.",
   subfze_2 =	"7c000190RR.",
   addze_2 =	"7c000194RR.",
   addze_2 =	"7c000194RR.",
-  stdcx_3 =	"7c0001acRR0R.",
+  ["stdcx._3"] =	"7c0001adRR0R.",
   stbx_3 =	"7c0001aeRR0R",
   stbx_3 =	"7c0001aeRR0R",
+  stvx_3 =	"7c0001ceVRR",
   subfme_2 =	"7c0001d0RR.",
   subfme_2 =	"7c0001d0RR.",
   mulld_3 =	"7c0001d2RRR.",
   mulld_3 =	"7c0001d2RRR.",
   addme_2 =	"7c0001d4RR.",
   addme_2 =	"7c0001d4RR.",
   mullw_3 =	"7c0001d6RRR.",
   mullw_3 =	"7c0001d6RRR.",
   dcbtst_2 =	"7c0001ec-RR",
   dcbtst_2 =	"7c0001ec-RR",
   stbux_3 =	"7c0001eeRR0R",
   stbux_3 =	"7c0001eeRR0R",
+  bpermd_3 =	"7c0001f8RR~R",
+  lvepxl_3 =	"7c00020eVRR",
   add_3 =	"7c000214RRR.",
   add_3 =	"7c000214RRR.",
+  lqarx_3 =	"7c000228R:R0R",
   dcbt_2 =	"7c00022c-RR",
   dcbt_2 =	"7c00022c-RR",
   lhzx_3 =	"7c00022eRR0R",
   lhzx_3 =	"7c00022eRR0R",
+  cdtbcd_2 =	"7c000234RR~",
   eqv_3 =	"7c000238RR~R.",
   eqv_3 =	"7c000238RR~R.",
+  lvepx_3 =	"7c00024eVRR",
   eciwx_3 =	"7c00026cRR0R",
   eciwx_3 =	"7c00026cRR0R",
   lhzux_3 =	"7c00026eRR0R",
   lhzux_3 =	"7c00026eRR0R",
+  cbcdtd_2 =	"7c000274RR~",
   xor_3 =	"7c000278RR~R.",
   xor_3 =	"7c000278RR~R.",
   mfspefscr_1 =	"7c0082a6R",
   mfspefscr_1 =	"7c0082a6R",
   mfxer_1 =	"7c0102a6R",
   mfxer_1 =	"7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
   lhax_3 =	"7c0002aeRR0R",
   lhax_3 =	"7c0002aeRR0R",
   mftb_1 =	"7c0c42e6R",
   mftb_1 =	"7c0c42e6R",
   mftbu_1 =	"7c0d42e6R",
   mftbu_1 =	"7c0d42e6R",
+  lvxl_3 =	"7c0002ceVRR",
   lwaux_3 =	"7c0002eaRR0R",
   lwaux_3 =	"7c0002eaRR0R",
   lhaux_3 =	"7c0002eeRR0R",
   lhaux_3 =	"7c0002eeRR0R",
+  popcntw_2 =	"7c0002f4RR~",
+  divdeu_3 =	"7c000312RRR.",
+  divweu_3 =	"7c000316RRR.",
   sthx_3 =	"7c00032eRR0R",
   sthx_3 =	"7c00032eRR0R",
   orc_3 =	"7c000338RR~R.",
   orc_3 =	"7c000338RR~R.",
   ecowx_3 =	"7c00036cRR0R",
   ecowx_3 =	"7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
   mtctr_1 =	"7c0903a6R",
   mtctr_1 =	"7c0903a6R",
   dcbi_2 =	"7c0003ac-RR",
   dcbi_2 =	"7c0003ac-RR",
   nand_3 =	"7c0003b8RR~R.",
   nand_3 =	"7c0003b8RR~R.",
+  dsn_2 =	"7c0003c6-RR",
+  stvxl_3 =	"7c0003ceVRR",
   divd_3 =	"7c0003d2RRR.",
   divd_3 =	"7c0003d2RRR.",
   divw_3 =	"7c0003d6RRR.",
   divw_3 =	"7c0003d6RRR.",
+  popcntd_2 =	"7c0003f4RR~",
   cmpb_3 =	"7c0003f8RR~R.",
   cmpb_3 =	"7c0003f8RR~R.",
   mcrxr_1 =	"7c000400X",
   mcrxr_1 =	"7c000400X",
+  lbdx_3 =	"7c000406RRR",
   subfco_3 =	"7c000410RRR.",
   subfco_3 =	"7c000410RRR.",
   subco_3 =	"7c000410RRR~.",
   subco_3 =	"7c000410RRR~.",
   addco_3 =	"7c000414RRR.",
   addco_3 =	"7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
   lfsx_3 =	"7c00042eFR0R",
   lfsx_3 =	"7c00042eFR0R",
   srw_3 =	"7c000430RR~R.",
   srw_3 =	"7c000430RR~R.",
   srd_3 =	"7c000436RR~R.",
   srd_3 =	"7c000436RR~R.",
+  lhdx_3 =	"7c000446RRR",
   subfo_3 =	"7c000450RRR.",
   subfo_3 =	"7c000450RRR.",
   subo_3 =	"7c000450RRR~.",
   subo_3 =	"7c000450RRR~.",
   lfsux_3 =	"7c00046eFR0R",
   lfsux_3 =	"7c00046eFR0R",
+  lwdx_3 =	"7c000486RRR",
   lswi_3 =	"7c0004aaRR0A",
   lswi_3 =	"7c0004aaRR0A",
   sync_0 =	"7c0004ac",
   sync_0 =	"7c0004ac",
   lwsync_0 =	"7c2004ac",
   lwsync_0 =	"7c2004ac",
   ptesync_0 =	"7c4004ac",
   ptesync_0 =	"7c4004ac",
   lfdx_3 =	"7c0004aeFR0R",
   lfdx_3 =	"7c0004aeFR0R",
+  lddx_3 =	"7c0004c6RRR",
   nego_2 =	"7c0004d0RR.",
   nego_2 =	"7c0004d0RR.",
   lfdux_3 =	"7c0004eeFR0R",
   lfdux_3 =	"7c0004eeFR0R",
+  stbdx_3 =	"7c000506RRR",
   subfeo_3 =	"7c000510RRR.",
   subfeo_3 =	"7c000510RRR.",
   subeo_3 =	"7c000510RRR~.",
   subeo_3 =	"7c000510RRR~.",
   addeo_3 =	"7c000514RRR.",
   addeo_3 =	"7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
   stswx_3 =	"7c00052aRR0R",
   stswx_3 =	"7c00052aRR0R",
   stwbrx_3 =	"7c00052cRR0R",
   stwbrx_3 =	"7c00052cRR0R",
   stfsx_3 =	"7c00052eFR0R",
   stfsx_3 =	"7c00052eFR0R",
+  sthdx_3 =	"7c000546RRR",
+  ["stbcx._3"] =	"7c00056dRRR",
   stfsux_3 =	"7c00056eFR0R",
   stfsux_3 =	"7c00056eFR0R",
+  stwdx_3 =	"7c000586RRR",
   subfzeo_2 =	"7c000590RR.",
   subfzeo_2 =	"7c000590RR.",
   addzeo_2 =	"7c000594RR.",
   addzeo_2 =	"7c000594RR.",
   stswi_3 =	"7c0005aaRR0A",
   stswi_3 =	"7c0005aaRR0A",
+  ["sthcx._3"] =	"7c0005adRRR",
   stfdx_3 =	"7c0005aeFR0R",
   stfdx_3 =	"7c0005aeFR0R",
+  stddx_3 =	"7c0005c6RRR",
   subfmeo_2 =	"7c0005d0RR.",
   subfmeo_2 =	"7c0005d0RR.",
   mulldo_3 =	"7c0005d2RRR.",
   mulldo_3 =	"7c0005d2RRR.",
   addmeo_2 =	"7c0005d4RR.",
   addmeo_2 =	"7c0005d4RR.",
   mullwo_3 =	"7c0005d6RRR.",
   mullwo_3 =	"7c0005d6RRR.",
   dcba_2 =	"7c0005ec-RR",
   dcba_2 =	"7c0005ec-RR",
   stfdux_3 =	"7c0005eeFR0R",
   stfdux_3 =	"7c0005eeFR0R",
+  stvepxl_3 =	"7c00060eVRR",
   addo_3 =	"7c000614RRR.",
   addo_3 =	"7c000614RRR.",
   lhbrx_3 =	"7c00062cRR0R",
   lhbrx_3 =	"7c00062cRR0R",
+  lfdpx_3 =	"7c00062eF:RR",
   sraw_3 =	"7c000630RR~R.",
   sraw_3 =	"7c000630RR~R.",
   srad_3 =	"7c000634RR~R.",
   srad_3 =	"7c000634RR~R.",
+  lfddx_3 =	"7c000646FRR",
+  stvepx_3 =	"7c00064eVRR",
   srawi_3 =	"7c000670RR~A.",
   srawi_3 =	"7c000670RR~A.",
   sradi_3 =	"7c000674RR~H.",
   sradi_3 =	"7c000674RR~H.",
   eieio_0 =	"7c0006ac",
   eieio_0 =	"7c0006ac",
   lfiwax_3 =	"7c0006aeFR0R",
   lfiwax_3 =	"7c0006aeFR0R",
+  divdeuo_3 =	"7c000712RRR.",
+  divweuo_3 =	"7c000716RRR.",
   sthbrx_3 =	"7c00072cRR0R",
   sthbrx_3 =	"7c00072cRR0R",
+  stfdpx_3 =	"7c00072eF:RR",
   extsh_2 =	"7c000734RR~.",
   extsh_2 =	"7c000734RR~.",
+  stfddx_3 =	"7c000746FRR",
+  divdeo_3 =	"7c000752RRR.",
+  divweo_3 =	"7c000756RRR.",
   extsb_2 =	"7c000774RR~.",
   extsb_2 =	"7c000774RR~.",
   divduo_3 =	"7c000792RRR.",
   divduo_3 =	"7c000792RRR.",
   divwou_3 =	"7c000796RRR.",
   divwou_3 =	"7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
   divwo_3 =	"7c0007d6RRR.",
   divwo_3 =	"7c0007d6RRR.",
   dcbz_2 =	"7c0007ec-RR",
   dcbz_2 =	"7c0007ec-RR",
 
 
+  ["tbegin._1"] =	"7c00051d1",
+  ["tbegin._0"] =	"7c00051d",
+  ["tend._1"] =		"7c00055dY",
+  ["tend._0"] =		"7c00055d",
+  ["tendall._0"] =	"7e00055d",
+  tcheck_1 =		"7c00059cX",
+  ["tsr._1"] =		"7c0005dd1",
+  ["tsuspend._0"] =	"7c0005dd",
+  ["tresume._0"] =	"7c2005dd",
+  ["tabortwc._3"] =	"7c00061dARR",
+  ["tabortdc._3"] =	"7c00065dARR",
+  ["tabortwci._3"] =	"7c00069dARS",
+  ["tabortdci._3"] =	"7c0006ddARS",
+  ["tabort._1"] =	"7c00071d-R-",
+  ["treclaim._1"] =	"7c00075d-R",
+  ["trechkpt._0"] =	"7c0007dd",
+
+  lxsiwzx_3 =	"7c000018QRR",
+  lxsiwax_3 =	"7c000098QRR",
+  mfvsrd_2 =	"7c000066-Rq",
+  mfvsrwz_2 =	"7c0000e6-Rq",
+  stxsiwx_3 =	"7c000118QRR",
+  mtvsrd_2 =	"7c000166QR",
+  mtvsrwa_2 =	"7c0001a6QR",
+  lxvdsx_3 =	"7c000298QRR",
+  lxsspx_3 =	"7c000418QRR",
+  lxsdx_3 =	"7c000498QRR",
+  stxsspx_3 =	"7c000518QRR",
+  stxsdx_3 =	"7c000598QRR",
+  lxvw4x_3 =	"7c000618QRR",
+  lxvd2x_3 =	"7c000698QRR",
+  stxvw4x_3 =	"7c000718QRR",
+  stxvd2x_3 =	"7c000798QRR",
+
   -- Primary opcode 30:
   -- Primary opcode 30:
   rldicl_4 =	"78000000RR~HM.",
   rldicl_4 =	"78000000RR~HM.",
   rldicr_4 =	"78000004RR~HM.",
   rldicr_4 =	"78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
   rldcl_4 =	"78000010RR~RM.",
   rldcl_4 =	"78000010RR~RM.",
   rldcr_4 =	"78000012RR~RM.",
   rldcr_4 =	"78000012RR~RM.",
 
 
+  rotldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = "0"
+  end),
+  rotrdi_3 =	op_alias("rldicl_4", function(p)
+    p[3] = "64-("..p[3]..")"; p[4] = "0"
+  end),
+  rotld_3 =	op_alias("rldcl_4", function(p)
+    p[4] = "0"
+  end),
+  sldi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"
+  end),
+  srdi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "64-("..p[3]..")"
+  end),
+  clrldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "0"
+  end),
+  clrrdi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"; p[3] = "0"
+  end),
+
+  -- Primary opcode 56:
+  lq_2 =	"e0000000R:D", -- NYI: displacement must be divisible by 8.
+
+  -- Primary opcode 57:
+  lfdp_2 =	"e4000000F:D", -- NYI: displacement must be divisible by 4.
+
   -- Primary opcode 59:
   -- Primary opcode 59:
   fdivs_3 =	"ec000024FFF.",
   fdivs_3 =	"ec000024FFF.",
   fsubs_3 =	"ec000028FFF.",
   fsubs_3 =	"ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
   fmadds_4 =	"ec00003aFFFF~.",
   fmadds_4 =	"ec00003aFFFF~.",
   fnmsubs_4 =	"ec00003cFFFF~.",
   fnmsubs_4 =	"ec00003cFFFF~.",
   fnmadds_4 =	"ec00003eFFFF~.",
   fnmadds_4 =	"ec00003eFFFF~.",
+  fcfids_2 =	"ec00069cF-F.",
+  fcfidus_2 =	"ec00079cF-F.",
+
+  dadd_3 =	"ec000004FFF.",
+  dqua_4 =	"ec000006FFFZ.",
+  dmul_3 =	"ec000044FFF.",
+  drrnd_4 =	"ec000046FFFZ.",
+  dscli_3 =	"ec000084FF6.",
+  dquai_4 =	"ec000086SF~FZ.",
+  dscri_3 =	"ec0000c4FF6.",
+  drintx_4 =	"ec0000c61F~FZ.",
+  dcmpo_3 =	"ec000104XFF",
+  dtstex_3 =	"ec000144XFF",
+  dtstdc_3 =	"ec000184XF6",
+  dtstdg_3 =	"ec0001c4XF6",
+  drintn_4 =	"ec0001c61F~FZ.",
+  dctdp_2 =	"ec000204F-F.",
+  dctfix_2 =	"ec000244F-F.",
+  ddedpd_3 =	"ec000284ZF~F.",
+  dxex_2 =	"ec0002c4F-F.",
+  dsub_3 =	"ec000404FFF.",
+  ddiv_3 =	"ec000444FFF.",
+  dcmpu_3 =	"ec000504XFF",
+  dtstsf_3 =	"ec000544XFF",
+  drsp_2 =	"ec000604F-F.",
+  dcffix_2 =	"ec000644F-F.",
+  denbcd_3 =	"ec000684YF~F.",
+  diex_3 =	"ec0006c4FFF.",
+
+  -- Primary opcode 60:
+  xsaddsp_3 =		"f0000000QQQ",
+  xsmaddasp_3 =		"f0000008QQQ",
+  xxsldwi_4 =		"f0000010QQQz",
+  xsrsqrtesp_2 =	"f0000028Q-Q",
+  xssqrtsp_2 =		"f000002cQ-Q",
+  xxsel_4 =		"f0000030QQQQ",
+  xssubsp_3 =		"f0000040QQQ",
+  xsmaddmsp_3 =		"f0000048QQQ",
+  xxpermdi_4 =		"f0000050QQQz",
+  xsresp_2 =		"f0000068Q-Q",
+  xsmulsp_3 =		"f0000080QQQ",
+  xsmsubasp_3 =		"f0000088QQQ",
+  xxmrghw_3 =		"f0000090QQQ",
+  xsdivsp_3 =		"f00000c0QQQ",
+  xsmsubmsp_3 =		"f00000c8QQQ",
+  xsadddp_3 =		"f0000100QQQ",
+  xsmaddadp_3 =		"f0000108QQQ",
+  xscmpudp_3 =		"f0000118XQQ",
+  xscvdpuxws_2 =	"f0000120Q-Q",
+  xsrdpi_2 =		"f0000124Q-Q",
+  xsrsqrtedp_2 =	"f0000128Q-Q",
+  xssqrtdp_2 =		"f000012cQ-Q",
+  xssubdp_3 =		"f0000140QQQ",
+  xsmaddmdp_3 =		"f0000148QQQ",
+  xscmpodp_3 =		"f0000158XQQ",
+  xscvdpsxws_2 =	"f0000160Q-Q",
+  xsrdpiz_2 =		"f0000164Q-Q",
+  xsredp_2 =		"f0000168Q-Q",
+  xsmuldp_3 =		"f0000180QQQ",
+  xsmsubadp_3 =		"f0000188QQQ",
+  xxmrglw_3 =		"f0000190QQQ",
+  xsrdpip_2 =		"f00001a4Q-Q",
+  xstsqrtdp_2 =		"f00001a8X-Q",
+  xsrdpic_2 =		"f00001acQ-Q",
+  xsdivdp_3 =		"f00001c0QQQ",
+  xsmsubmdp_3 =		"f00001c8QQQ",
+  xsrdpim_2 =		"f00001e4Q-Q",
+  xstdivdp_3 =		"f00001e8XQQ",
+  xvaddsp_3 =		"f0000200QQQ",
+  xvmaddasp_3 =		"f0000208QQQ",
+  xvcmpeqsp_3 =		"f0000218QQQ",
+  xvcvspuxws_2 =	"f0000220Q-Q",
+  xvrspi_2 =		"f0000224Q-Q",
+  xvrsqrtesp_2 =	"f0000228Q-Q",
+  xvsqrtsp_2 =		"f000022cQ-Q",
+  xvsubsp_3 =		"f0000240QQQ",
+  xvmaddmsp_3 =		"f0000248QQQ",
+  xvcmpgtsp_3 =		"f0000258QQQ",
+  xvcvspsxws_2 =	"f0000260Q-Q",
+  xvrspiz_2 =		"f0000264Q-Q",
+  xvresp_2 =		"f0000268Q-Q",
+  xvmulsp_3 =		"f0000280QQQ",
+  xvmsubasp_3 =		"f0000288QQQ",
+  xxspltw_3 =		"f0000290QQg~",
+  xvcmpgesp_3 =		"f0000298QQQ",
+  xvcvuxwsp_2 =		"f00002a0Q-Q",
+  xvrspip_2 =		"f00002a4Q-Q",
+  xvtsqrtsp_2 =		"f00002a8X-Q",
+  xvrspic_2 =		"f00002acQ-Q",
+  xvdivsp_3 =		"f00002c0QQQ",
+  xvmsubmsp_3 =		"f00002c8QQQ",
+  xvcvsxwsp_2 =		"f00002e0Q-Q",
+  xvrspim_2 =		"f00002e4Q-Q",
+  xvtdivsp_3 =		"f00002e8XQQ",
+  xvadddp_3 =		"f0000300QQQ",
+  xvmaddadp_3 =		"f0000308QQQ",
+  xvcmpeqdp_3 =		"f0000318QQQ",
+  xvcvdpuxws_2 =	"f0000320Q-Q",
+  xvrdpi_2 =		"f0000324Q-Q",
+  xvrsqrtedp_2 =	"f0000328Q-Q",
+  xvsqrtdp_2 =		"f000032cQ-Q",
+  xvsubdp_3 =		"f0000340QQQ",
+  xvmaddmdp_3 =		"f0000348QQQ",
+  xvcmpgtdp_3 =		"f0000358QQQ",
+  xvcvdpsxws_2 =	"f0000360Q-Q",
+  xvrdpiz_2 =		"f0000364Q-Q",
+  xvredp_2 =		"f0000368Q-Q",
+  xvmuldp_3 =		"f0000380QQQ",
+  xvmsubadp_3 =		"f0000388QQQ",
+  xvcmpgedp_3 =		"f0000398QQQ",
+  xvcvuxwdp_2 =		"f00003a0Q-Q",
+  xvrdpip_2 =		"f00003a4Q-Q",
+  xvtsqrtdp_2 =		"f00003a8X-Q",
+  xvrdpic_2 =		"f00003acQ-Q",
+  xvdivdp_3 =		"f00003c0QQQ",
+  xvmsubmdp_3 =		"f00003c8QQQ",
+  xvcvsxwdp_2 =		"f00003e0Q-Q",
+  xvrdpim_2 =		"f00003e4Q-Q",
+  xvtdivdp_3 =		"f00003e8XQQ",
+  xsnmaddasp_3 =	"f0000408QQQ",
+  xxland_3 =		"f0000410QQQ",
+  xscvdpsp_2 =		"f0000424Q-Q",
+  xscvdpspn_2 =		"f000042cQ-Q",
+  xsnmaddmsp_3 =	"f0000448QQQ",
+  xxlandc_3 =		"f0000450QQQ",
+  xsrsp_2 =		"f0000464Q-Q",
+  xsnmsubasp_3 =	"f0000488QQQ",
+  xxlor_3 =		"f0000490QQQ",
+  xscvuxdsp_2 =		"f00004a0Q-Q",
+  xsnmsubmsp_3 =	"f00004c8QQQ",
+  xxlxor_3 =		"f00004d0QQQ",
+  xscvsxdsp_2 =		"f00004e0Q-Q",
+  xsmaxdp_3 =		"f0000500QQQ",
+  xsnmaddadp_3 =	"f0000508QQQ",
+  xxlnor_3 =		"f0000510QQQ",
+  xscvdpuxds_2 =	"f0000520Q-Q",
+  xscvspdp_2 =		"f0000524Q-Q",
+  xscvspdpn_2 =		"f000052cQ-Q",
+  xsmindp_3 =		"f0000540QQQ",
+  xsnmaddmdp_3 =	"f0000548QQQ",
+  xxlorc_3 =		"f0000550QQQ",
+  xscvdpsxds_2 =	"f0000560Q-Q",
+  xsabsdp_2 =		"f0000564Q-Q",
+  xscpsgndp_3 =		"f0000580QQQ",
+  xsnmsubadp_3 =	"f0000588QQQ",
+  xxlnand_3 =		"f0000590QQQ",
+  xscvuxddp_2 =		"f00005a0Q-Q",
+  xsnabsdp_2 =		"f00005a4Q-Q",
+  xsnmsubmdp_3 =	"f00005c8QQQ",
+  xxleqv_3 =		"f00005d0QQQ",
+  xscvsxddp_2 =		"f00005e0Q-Q",
+  xsnegdp_2 =		"f00005e4Q-Q",
+  xvmaxsp_3 =		"f0000600QQQ",
+  xvnmaddasp_3 =	"f0000608QQQ",
+  ["xvcmpeqsp._3"] =	"f0000618QQQ",
+  xvcvspuxds_2 =	"f0000620Q-Q",
+  xvcvdpsp_2 =		"f0000624Q-Q",
+  xvminsp_3 =		"f0000640QQQ",
+  xvnmaddmsp_3 =	"f0000648QQQ",
+  ["xvcmpgtsp._3"] =	"f0000658QQQ",
+  xvcvspsxds_2 =	"f0000660Q-Q",
+  xvabssp_2 =		"f0000664Q-Q",
+  xvcpsgnsp_3 =		"f0000680QQQ",
+  xvnmsubasp_3 =	"f0000688QQQ",
+  ["xvcmpgesp._3"] =	"f0000698QQQ",
+  xvcvuxdsp_2 =		"f00006a0Q-Q",
+  xvnabssp_2 =		"f00006a4Q-Q",
+  xvnmsubmsp_3 =	"f00006c8QQQ",
+  xvcvsxdsp_2 =		"f00006e0Q-Q",
+  xvnegsp_2 =		"f00006e4Q-Q",
+  xvmaxdp_3 =		"f0000700QQQ",
+  xvnmaddadp_3 =	"f0000708QQQ",
+  ["xvcmpeqdp._3"] =	"f0000718QQQ",
+  xvcvdpuxds_2 =	"f0000720Q-Q",
+  xvcvspdp_2 =		"f0000724Q-Q",
+  xvmindp_3 =		"f0000740QQQ",
+  xvnmaddmdp_3 =	"f0000748QQQ",
+  ["xvcmpgtdp._3"] =	"f0000758QQQ",
+  xvcvdpsxds_2 =	"f0000760Q-Q",
+  xvabsdp_2 =		"f0000764Q-Q",
+  xvcpsgndp_3 =		"f0000780QQQ",
+  xvnmsubadp_3 =	"f0000788QQQ",
+  ["xvcmpgedp._3"] =	"f0000798QQQ",
+  xvcvuxddp_2 =		"f00007a0Q-Q",
+  xvnabsdp_2 =		"f00007a4Q-Q",
+  xvnmsubmdp_3 =	"f00007c8QQQ",
+  xvcvsxddp_2 =		"f00007e0Q-Q",
+  xvnegdp_2 =		"f00007e4Q-Q",
+
+  -- Primary opcode 61:
+  stfdp_2 =	"f4000000F:D", -- NYI: displacement must be divisible by 4.
+
+  -- Primary opcode 62:
+  stq_2 =	"f8000002R:D", -- NYI: displacement must be divisible by 8.
 
 
   -- Primary opcode 63:
   -- Primary opcode 63:
   fdiv_3 =	"fc000024FFF.",
   fdiv_3 =	"fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
   frsp_2 =	"fc000018F-F.",
   frsp_2 =	"fc000018F-F.",
   fctiw_2 =	"fc00001cF-F.",
   fctiw_2 =	"fc00001cF-F.",
   fctiwz_2 =	"fc00001eF-F.",
   fctiwz_2 =	"fc00001eF-F.",
+  ftdiv_2 =	"fc000100X-F.",
+  fctiwu_2 =	"fc00011cF-F.",
+  fctiwuz_2 =	"fc00011eF-F.",
   mtfsfi_2 =	"fc00010cAA", -- NYI: upshift.
   mtfsfi_2 =	"fc00010cAA", -- NYI: upshift.
   fnabs_2 =	"fc000110F-F.",
   fnabs_2 =	"fc000110F-F.",
+  ftsqrt_2 =	"fc000140X-F.",
   fabs_2 =	"fc000210F-F.",
   fabs_2 =	"fc000210F-F.",
   frin_2 =	"fc000310F-F.",
   frin_2 =	"fc000310F-F.",
   friz_2 =	"fc000350F-F.",
   friz_2 =	"fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
   -- NYI: mtfsf, mtfsb0, mtfsb1.
   -- NYI: mtfsf, mtfsb0, mtfsb1.
   fctid_2 =	"fc00065cF-F.",
   fctid_2 =	"fc00065cF-F.",
   fctidz_2 =	"fc00065eF-F.",
   fctidz_2 =	"fc00065eF-F.",
+  fmrgow_3 =	"fc00068cFFF",
   fcfid_2 =	"fc00069cF-F.",
   fcfid_2 =	"fc00069cF-F.",
+  fctidu_2 =	"fc00075cF-F.",
+  fctiduz_2 =	"fc00075eF-F.",
+  fmrgew_3 =	"fc00078cFFF",
+  fcfidu_2 =	"fc00079cF-F.",
+
+  daddq_3 =	"fc000004F:F:F:.",
+  dquaq_4 =	"fc000006F:F:F:Z.",
+  dmulq_3 =	"fc000044F:F:F:.",
+  drrndq_4 =	"fc000046F:F:F:Z.",
+  dscliq_3 =	"fc000084F:F:6.",
+  dquaiq_4 =	"fc000086SF:~F:Z.",
+  dscriq_3 =	"fc0000c4F:F:6.",
+  drintxq_4 =	"fc0000c61F:~F:Z.",
+  dcmpoq_3 =	"fc000104XF:F:",
+  dtstexq_3 =	"fc000144XF:F:",
+  dtstdcq_3 =	"fc000184XF:6",
+  dtstdgq_3 =	"fc0001c4XF:6",
+  drintnq_4 =	"fc0001c61F:~F:Z.",
+  dctqpq_2 =	"fc000204F:-F:.",
+  dctfixq_2 =	"fc000244F:-F:.",
+  ddedpdq_3 =	"fc000284ZF:~F:.",
+  dxexq_2 =	"fc0002c4F:-F:.",
+  dsubq_3 =	"fc000404F:F:F:.",
+  ddivq_3 =	"fc000444F:F:F:.",
+  dcmpuq_3 =	"fc000504XF:F:",
+  dtstsfq_3 =	"fc000544XF:F:",
+  drdpq_2 =	"fc000604F:-F:.",
+  dcffixq_2 =	"fc000644F:-F:.",
+  denbcdq_3 =	"fc000684YF:~F:.",
+  diexq_3 =	"fc0006c4F:FF:.",
 
 
   -- Primary opcode 4, SPE APU extension:
   -- Primary opcode 4, SPE APU extension:
   evaddw_3 =		"10000200RRR",
   evaddw_3 =		"10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
 do
 do
   local t = {}
   local t = {}
   for k,v in pairs(map_op) do
   for k,v in pairs(map_op) do
-    if sub(v, -1) == "." then
+    if type(v) == "string" and sub(v, -1) == "." then
       local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
       local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
       t[sub(k, 1, -3).."."..sub(k, -2)] = v2
       t[sub(k, 1, -3).."."..sub(k, -2)] = v2
     end
     end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
   werror("bad register name `"..expr.."'")
   werror("bad register name `"..expr.."'")
 end
 end
 
 
+local function parse_vr(expr)
+  local r = match(expr, "^v([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_vs(expr)
+  local r = match(expr, "^vs([1-6]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 63 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
 local function parse_cr(expr)
 local function parse_cr(expr)
   local r = match(expr, "^cr([0-7])$")
   local r = match(expr, "^cr([0-7])$")
   if r then return tonumber(r) end
   if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
   werror("bad condition bit name `"..expr.."'")
   werror("bad condition bit name `"..expr.."'")
 end
 end
 
 
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
 local function parse_imm(imm, bits, shift, scale, signed)
 local function parse_imm(imm, bits, shift, scale, signed)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
   if n then
     local m = sar(n, scale)
     local m = sar(n, scale)
     if shl(m, scale) == n then
     if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
       end
       end
     end
     end
     werror("out of range immediate `"..imm.."'")
     werror("out of range immediate `"..imm.."'")
-  elseif match(imm, "^r([1-3]?[0-9])$") or
+  elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+	 match(imm, "^vs([1-6]?[0-9])$") or
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
     werror("expected immediate operand, got register")
   else
   else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
 end
 end
 
 
 local function parse_shiftmask(imm, isshift)
 local function parse_shiftmask(imm, isshift)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
   if n then
     if shr(n, 6) == 0 then
     if shr(n, 6) == 0 then
-      local lsb = band(imm, 31)
-      local msb = imm - lsb
+      local lsb = band(n, 31)
+      local msb = n - lsb
       return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
       return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
     end
     end
     werror("out of range immediate `"..imm.."'")
     werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
     werror("expected immediate operand, got register")
   else
   else
-    werror("NYI: parameterized 64 bit shift/mask")
+    waction("IMMSH", isshift and 1 or 0, imm)
+    return 0;
   end
   end
 end
 end
 
 
@@ -1011,7 +1648,7 @@ end
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Handle opcodes defined with template strings.
 -- Handle opcodes defined with template strings.
-map_op[".template__"] = function(params, template, nparams)
+op_template = function(params, template, nparams)
   if not params then return sub(template, 9) end
   if not params then return sub(template, 9) end
   local op = tonumber(sub(template, 1, 8), 16)
   local op = tonumber(sub(template, 1, 8), 16)
   local n, rs = 1, 26
   local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
       rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
     elseif p == "F" then
     elseif p == "F" then
       rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
       rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
+    elseif p == "V" then
+      rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
+    elseif p == "Q" then
+      local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
+      local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
+      op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
+    elseif p == "q" then
+      local vs = parse_vs(params[n]); n = n + 1
+      op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
     elseif p == "A" then
     elseif p == "A" then
       rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
       rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
     elseif p == "S" then
     elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
       rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
     elseif p == "X" then
     elseif p == "X" then
       rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
       rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
+    elseif p == "1" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
+    elseif p == "g" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
+    elseif p == "3" then
+      rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
+    elseif p == "P" then
+      rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "p" then
+      op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "6" then
+      rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
+    elseif p == "Y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
+    elseif p == "y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
+    elseif p == "Z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
+    elseif p == "z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
     elseif p == "W" then
     elseif p == "W" then
       op = op + parse_cr(params[n]); n = n + 1
       op = op + parse_cr(params[n]); n = n + 1
     elseif p == "G" then
     elseif p == "G" then
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
       local lo = band(op, mm)
       local lo = band(op, mm)
       local hi = band(op, shl(mm, 5))
       local hi = band(op, shl(mm, 5))
       op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
       op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
+    elseif p == ":" then
+      if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
     elseif p == "-" then
     elseif p == "-" then
       rs = rs - 5
       rs = rs - 5
     elseif p == "." then
     elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
   wputpos(pos, op)
   wputpos(pos, op)
 end
 end
 
 
+map_op[".template__"] = op_template
+
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Pseudo-opcode to mark the position where the action list is to be emitted.
 -- Pseudo-opcode to mark the position where the action list is to be emitted.

+ 3 - 3
jni/LuaJIT-2.0.1/dynasm/dasm_proto.h → jni/LuaJIT-2.1/dynasm/dasm_proto.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** DynASM encoding engine prototypes.
 ** DynASM encoding engine prototypes.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 
@@ -10,8 +10,8 @@
 #include <stddef.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <stdarg.h>
 
 
-#define DASM_IDENT	"DynASM 1.3.0"
-#define DASM_VERSION	10300	/* 1.3.0 */
+#define DASM_IDENT	"DynASM 1.4.0"
+#define DASM_VERSION	10400	/* 1.4.0 */
 
 
 #ifndef Dst_DECL
 #ifndef Dst_DECL
 #define Dst_DECL	dasm_State **Dst
 #define Dst_DECL	dasm_State **Dst

+ 1 - 1
jni/LuaJIT-2.0.1/dynasm/dasm_x64.lua → jni/LuaJIT-2.1/dynasm/dasm_x64.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- DynASM x64 module.
 -- DynASM x64 module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- This module just sets 64 bit mode for the combined x86/x64 module.
 -- This module just sets 64 bit mode for the combined x86/x64 module.

+ 9 - 3
jni/LuaJIT-2.0.1/dynasm/dasm_x86.h → jni/LuaJIT-2.1/dynasm/dasm_x86.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** DynASM x86 encoding engine.
 ** DynASM x86 encoding engine.
-** Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 
@@ -213,7 +213,8 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_REL_LG:
       case DASM_REL_LG:
       case DASM_IMM_LG:
       case DASM_IMM_LG:
 	n = *p++; pl = D->lglabels + n;
 	n = *p++; pl = D->lglabels + n;
-	if (n <= 246) { CKPL(lg, LG); goto putrel; }  /* Bkwd rel or global. */
+	/* Bkwd rel or global. */
+	if (n <= 246) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
 	pl -= 246; n = *pl;
 	pl -= 246; n = *pl;
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
 	goto linkrel;
 	goto linkrel;
@@ -390,7 +391,12 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_IMM_D: wd: dasmd(n); break;
 	case DASM_IMM_D: wd: dasmd(n); break;
 	case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
 	case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
 	case DASM_IMM_W: dasmw(n); break;
 	case DASM_IMM_W: dasmw(n); break;
-	case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
+	case DASM_VREG: {
+	  int t = *p++;
+	  if (t >= 5) n <<= 4; else if (t >= 2) n <<= 3;
+	  cp[-1] ^= n;
+	  break;
+	}
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
 	  b++; n = (int)(ptrdiff_t)D->globals[-n];
 	  b++; n = (int)(ptrdiff_t)D->globals[-n];
 	case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
 	case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */

+ 366 - 80
jni/LuaJIT-2.0.1/dynasm/dasm_x86.lua → jni/LuaJIT-2.1/dynasm/dasm_x86.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- DynASM x86/x64 module.
 -- DynASM x86/x64 module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
@@ -11,9 +11,9 @@ local x64 = x64
 local _info = {
 local _info = {
   arch =	x64 and "x64" or "x86",
   arch =	x64 and "x64" or "x86",
   description =	"DynASM x86/x64 module",
   description =	"DynASM x86/x64 module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
 local _s = string
 local _s = string
 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
-local concat, sort = table.concat, table.sort
+local concat, sort, remove = table.concat, table.sort, table.remove
 local bit = bit or require("bit")
 local bit = bit or require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
 
 
 -- Inherited tables and callbacks.
 -- Inherited tables and callbacks.
 local g_opt, g_arch
 local g_opt, g_arch
@@ -299,7 +299,7 @@ local function mkrmap(sz, cl, names)
     local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
     local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
     if needrex then map_reg_needrex[iname] = true end
     if needrex then map_reg_needrex[iname] = true end
     local name
     local name
-    if sz == "o" then name = format("xmm%d", i)
+    if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
     elseif sz == "f" then name = format("st%d", i)
     elseif sz == "f" then name = format("st%d", i)
     else name = format("r%d%s", i, sz == addrsize and "" or sz) end
     else name = format("r%d%s", i, sz == addrsize and "" or sz) end
     map_archdef[name] = iname
     map_archdef[name] = iname
@@ -334,21 +334,24 @@ mkrmap("f", "Rf")
 -- SSE registers (oword sized, but qword and dword accessible).
 -- SSE registers (oword sized, but qword and dword accessible).
 mkrmap("o", "xmm")
 mkrmap("o", "xmm")
 
 
+-- AVX registers (yword sized, but oword, qword and dword accessible).
+mkrmap("y", "ymm")
+
 -- Operand size prefixes to codes.
 -- Operand size prefixes to codes.
 local map_opsize = {
 local map_opsize = {
-  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
-  aword = addrsize,
+  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
+  tword = "t", aword = addrsize,
 }
 }
 
 
 -- Operand size code to number.
 -- Operand size code to number.
 local map_opsizenum = {
 local map_opsizenum = {
-  b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
+  b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
 }
 }
 
 
 -- Operand size code to name.
 -- Operand size code to name.
 local map_opsizename = {
 local map_opsizename = {
-  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
-  f = "fpword",
+  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
+  t = "tword", f = "fpword",
 }
 }
 
 
 -- Valid index register scale factors.
 -- Valid index register scale factors.
@@ -460,7 +463,29 @@ local function wputszarg(sz, n)
 end
 end
 
 
 -- Put multi-byte opcode with operand-size dependent modifications.
 -- Put multi-byte opcode with operand-size dependent modifications.
-local function wputop(sz, op, rex)
+local function wputop(sz, op, rex, vex)
+  if vex then
+    local tail
+    if vex.m == 1 and band(rex, 11) == 0 then
+      wputb(0xc5)
+      tail = shl(bxor(band(rex, 4), 4), 5)
+    else
+      wputb(0xc4)
+      wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
+      tail = shl(band(rex, 8), 4)
+    end
+    local reg, vreg = 0, nil
+    if vex.v then
+      reg = vex.v.reg
+      if not reg then werror("bad vex operand") end
+      if reg < 0 then reg = 0; vreg = vex.v.vreg end
+    end
+    if sz == "y" or vex.l then tail = tail + 4 end
+    wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
+    if vreg then waction("VREG", vreg); wputxb(4) end
+    rex = 0
+    if op >= 256 then werror("bad vex opcode") end
+  end
   local r
   local r
   if rex ~= 0 and not x64 then werror("bad operand size") end
   if rex ~= 0 and not x64 then werror("bad operand size") end
   if sz == "w" then wputb(102) end
   if sz == "w" then wputb(102) end
@@ -881,9 +906,15 @@ end
 --   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
 --   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
 --             The spare 3 bits are either filled with the last hex digit or
 --             The spare 3 bits are either filled with the last hex digit or
 --             the result from a previous "r"/"R". The opcode is restored.
 --             the result from a previous "r"/"R". The opcode is restored.
+--   "u"       Use VEX encoding, vvvv unused.
+--   "v"/"V"   Use VEX encoding, vvvv from 1st/2nd operand (the operand is
+--             removed from the list used by future characters).
+--   "L"       Force VEX.L
 --
 --
 -- All of the following characters force a flush of the opcode:
 -- All of the following characters force a flush of the opcode:
 --   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
 --   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
+--   "s"       stores a 4 bit immediate from the last register operand,
+--             followed by 4 zero bits.
 --   "S"       stores a signed 8 bit immediate from the last operand.
 --   "S"       stores a signed 8 bit immediate from the last operand.
 --   "U"       stores an unsigned 8 bit immediate from the last operand.
 --   "U"       stores an unsigned 8 bit immediate from the last operand.
 --   "W"       stores an unsigned 16 bit immediate from the last operand.
 --   "W"       stores an unsigned 16 bit immediate from the last operand.
@@ -1040,7 +1071,7 @@ local map_op = {
   -- ED: *in Rdw,dx
   -- ED: *in Rdw,dx
   -- EE: *out dx,Rb
   -- EE: *out dx,Rb
   -- EF: *out dx,Rdw
   -- EF: *out dx,Rdw
-  -- F0: *lock
+  lock_0 =	"F0",
   int1_0 =	"F1",
   int1_0 =	"F1",
   repne_0 =	"F2",
   repne_0 =	"F2",
   repnz_0 =	"F2",
   repnz_0 =	"F2",
@@ -1081,7 +1112,11 @@ local map_op = {
   btr_2 =	"mrqdw:0FB3Rm|miqdw:0FBA6mU",
   btr_2 =	"mrqdw:0FB3Rm|miqdw:0FBA6mU",
   bts_2 =	"mrqdw:0FABRm|miqdw:0FBA5mU",
   bts_2 =	"mrqdw:0FABRm|miqdw:0FBA5mU",
 
 
+  shld_3 =	"mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
+  shrd_3 =	"mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
+
   rdtsc_0 =	"0F31", -- P1+
   rdtsc_0 =	"0F31", -- P1+
+  rdpmc_0 =	"0F33", -- P6+
   cpuid_0 =	"0FA2", -- P1+
   cpuid_0 =	"0FA2", -- P1+
 
 
   -- floating point ops
   -- floating point ops
@@ -1114,6 +1149,9 @@ local map_op = {
   fucompp_0 =	"DAE9",
   fucompp_0 =	"DAE9",
   fcompp_0 =	"DED9",
   fcompp_0 =	"DED9",
 
 
+  fldenv_1 =	"x.:D94m",
+  fnstenv_1 =	"x.:D96m",
+  fstenv_1 =	"x.:9BD96m",
   fldcw_1 =	"xw:nD95m",
   fldcw_1 =	"xw:nD95m",
   fstcw_1 =	"xw:n9BD97m",
   fstcw_1 =	"xw:n9BD97m",
   fnstcw_1 =	"xw:nD97m",
   fnstcw_1 =	"xw:nD97m",
@@ -1184,11 +1222,13 @@ local map_op = {
   cvtsi2sd_2 =	"rm/od:F20F2ArM|rm/oq:F20F2ArXM",
   cvtsi2sd_2 =	"rm/od:F20F2ArM|rm/oq:F20F2ArXM",
   cvtsi2ss_2 =	"rm/od:F30F2ArM|rm/oq:F30F2ArXM",
   cvtsi2ss_2 =	"rm/od:F30F2ArM|rm/oq:F30F2ArXM",
   cvtss2sd_2 =	"rro:F30F5ArM|rx/od:",
   cvtss2sd_2 =	"rro:F30F5ArM|rx/od:",
-  cvtss2si_2 =	"rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
+  cvtss2si_2 =	"rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
   cvttpd2dq_2 =	"rmo:660FE6rM",
   cvttpd2dq_2 =	"rmo:660FE6rM",
   cvttps2dq_2 =	"rmo:F30F5BrM",
   cvttps2dq_2 =	"rmo:F30F5BrM",
   cvttsd2si_2 =	"rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
   cvttsd2si_2 =	"rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
   cvttss2si_2 =	"rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
   cvttss2si_2 =	"rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
+  fxsave_1 =	"x.:0FAE0m",
+  fxrstor_1 =	"x.:0FAE1m",
   ldmxcsr_1 =	"xd:0FAE2m",
   ldmxcsr_1 =	"xd:0FAE2m",
   lfence_0 =	"0FAEE8",
   lfence_0 =	"0FAEE8",
   maskmovdqu_2 = "rro:660FF7rM",
   maskmovdqu_2 = "rro:660FF7rM",
@@ -1217,46 +1257,14 @@ local map_op = {
   movups_2 =	"rmo:0F10rM|mro:0F11Rm",
   movups_2 =	"rmo:0F10rM|mro:0F11Rm",
   orpd_2 =	"rmo:660F56rM",
   orpd_2 =	"rmo:660F56rM",
   orps_2 =	"rmo:0F56rM",
   orps_2 =	"rmo:0F56rM",
-  packssdw_2 =	"rmo:660F6BrM",
-  packsswb_2 =	"rmo:660F63rM",
-  packuswb_2 =	"rmo:660F67rM",
-  paddb_2 =	"rmo:660FFCrM",
-  paddd_2 =	"rmo:660FFErM",
-  paddq_2 =	"rmo:660FD4rM",
-  paddsb_2 =	"rmo:660FECrM",
-  paddsw_2 =	"rmo:660FEDrM",
-  paddusb_2 =	"rmo:660FDCrM",
-  paddusw_2 =	"rmo:660FDDrM",
-  paddw_2 =	"rmo:660FFDrM",
-  pand_2 =	"rmo:660FDBrM",
-  pandn_2 =	"rmo:660FDFrM",
   pause_0 =	"F390",
   pause_0 =	"F390",
-  pavgb_2 =	"rmo:660FE0rM",
-  pavgw_2 =	"rmo:660FE3rM",
-  pcmpeqb_2 =	"rmo:660F74rM",
-  pcmpeqd_2 =	"rmo:660F76rM",
-  pcmpeqw_2 =	"rmo:660F75rM",
-  pcmpgtb_2 =	"rmo:660F64rM",
-  pcmpgtd_2 =	"rmo:660F66rM",
-  pcmpgtw_2 =	"rmo:660F65rM",
-  pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
+  pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
   pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
   pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
-  pmaddwd_2 =	"rmo:660FF5rM",
-  pmaxsw_2 =	"rmo:660FEErM",
-  pmaxub_2 =	"rmo:660FDErM",
-  pminsw_2 =	"rmo:660FEArM",
-  pminub_2 =	"rmo:660FDArM",
   pmovmskb_2 =	"rr/do:660FD7rM",
   pmovmskb_2 =	"rr/do:660FD7rM",
-  pmulhuw_2 =	"rmo:660FE4rM",
-  pmulhw_2 =	"rmo:660FE5rM",
-  pmullw_2 =	"rmo:660FD5rM",
-  pmuludq_2 =	"rmo:660FF4rM",
-  por_2 =	"rmo:660FEBrM",
   prefetchnta_1 = "xb:n0F180m",
   prefetchnta_1 = "xb:n0F180m",
   prefetcht0_1 = "xb:n0F181m",
   prefetcht0_1 = "xb:n0F181m",
   prefetcht1_1 = "xb:n0F182m",
   prefetcht1_1 = "xb:n0F182m",
   prefetcht2_1 = "xb:n0F183m",
   prefetcht2_1 = "xb:n0F183m",
-  psadbw_2 =	"rmo:660FF6rM",
   pshufd_3 =	"rmio:660F70rMU",
   pshufd_3 =	"rmio:660F70rMU",
   pshufhw_3 =	"rmio:F30F70rMU",
   pshufhw_3 =	"rmio:F30F70rMU",
   pshuflw_3 =	"rmio:F20F70rMU",
   pshuflw_3 =	"rmio:F20F70rMU",
@@ -1270,23 +1278,6 @@ local map_op = {
   psrldq_2 =	"rio:660F733mU",
   psrldq_2 =	"rio:660F733mU",
   psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
   psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
   psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
   psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
-  psubb_2 =	"rmo:660FF8rM",
-  psubd_2 =	"rmo:660FFArM",
-  psubq_2 =	"rmo:660FFBrM",
-  psubsb_2 =	"rmo:660FE8rM",
-  psubsw_2 =	"rmo:660FE9rM",
-  psubusb_2 =	"rmo:660FD8rM",
-  psubusw_2 =	"rmo:660FD9rM",
-  psubw_2 =	"rmo:660FF9rM",
-  punpckhbw_2 =	"rmo:660F68rM",
-  punpckhdq_2 =	"rmo:660F6ArM",
-  punpckhqdq_2 = "rmo:660F6DrM",
-  punpckhwd_2 =	"rmo:660F69rM",
-  punpcklbw_2 =	"rmo:660F60rM",
-  punpckldq_2 =	"rmo:660F62rM",
-  punpcklqdq_2 = "rmo:660F6CrM",
-  punpcklwd_2 =	"rmo:660F61rM",
-  pxor_2 =	"rmo:660FEFrM",
   rcpps_2 =	"rmo:0F53rM",
   rcpps_2 =	"rmo:0F53rM",
   rcpss_2 =	"rro:F30F53rM|rx/od:",
   rcpss_2 =	"rro:F30F53rM|rx/od:",
   rsqrtps_2 =	"rmo:0F52rM",
   rsqrtps_2 =	"rmo:0F52rM",
@@ -1344,7 +1335,7 @@ local map_op = {
   dpps_3 =	"rmio:660F3A40rMU",
   dpps_3 =	"rmio:660F3A40rMU",
   extractps_3 =	"mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
   extractps_3 =	"mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
   insertps_3 =	"rrio:660F3A41rMU|rxi/od:",
   insertps_3 =	"rrio:660F3A41rMU|rxi/od:",
-  movntdqa_2 =	"rmo:660F382ArM",
+  movntdqa_2 =	"rxo:660F382ArM",
   mpsadbw_3 =	"rmio:660F3A42rMU",
   mpsadbw_3 =	"rmio:660F3A42rMU",
   packusdw_2 =	"rmo:660F382BrM",
   packusdw_2 =	"rmo:660F382BrM",
   pblendvb_3 =	"rmRo:660F3810rM",
   pblendvb_3 =	"rmRo:660F3810rM",
@@ -1404,6 +1395,232 @@ local map_op = {
   movntsd_2 =	"xr/qo:nF20F2BRm",
   movntsd_2 =	"xr/qo:nF20F2BRm",
   movntss_2 =	"xr/do:F30F2BRm",
   movntss_2 =	"xr/do:F30F2BRm",
   -- popcnt is also in SSE4.2
   -- popcnt is also in SSE4.2
+
+  -- AES-NI
+  aesdec_2 =	"rmo:660F38DErM",
+  aesdeclast_2 = "rmo:660F38DFrM",
+  aesenc_2 =	"rmo:660F38DCrM",
+  aesenclast_2 = "rmo:660F38DDrM",
+  aesimc_2 =	"rmo:660F38DBrM",
+  aeskeygenassist_3 = "rmio:660F3ADFrMU",
+  pclmulqdq_3 =	"rmio:660F3A44rMU",
+
+   -- AVX FP ops
+  vaddsubpd_3 =	"rrmoy:660FVD0rM",
+  vaddsubps_3 =	"rrmoy:F20FVD0rM",
+  vandpd_3 =	"rrmoy:660FV54rM",
+  vandps_3 =	"rrmoy:0FV54rM",
+  vandnpd_3 =	"rrmoy:660FV55rM",
+  vandnps_3 =	"rrmoy:0FV55rM",
+  vblendpd_4 =	"rrmioy:660F3AV0DrMU",
+  vblendps_4 =	"rrmioy:660F3AV0CrMU",
+  vblendvpd_4 =	"rrmroy:660F3AV4BrMs",
+  vblendvps_4 =	"rrmroy:660F3AV4ArMs",
+  vbroadcastf128_2 = "rx/yo:660F38u1ArM",
+  vcmppd_4 =	"rrmioy:660FVC2rMU",
+  vcmpps_4 =	"rrmioy:0FVC2rMU",
+  vcmpsd_4 =	"rrrio:F20FVC2rMU|rrxi/ooq:",
+  vcmpss_4 =	"rrrio:F30FVC2rMU|rrxi/ood:",
+  vcomisd_2 =	"rro:660Fu2FrM|rx/oq:",
+  vcomiss_2 =	"rro:0Fu2FrM|rx/od:",
+  vcvtdq2pd_2 =	"rro:F30FuE6rM|rx/oq:|rm/yo:",
+  vcvtdq2ps_2 =	"rmoy:0Fu5BrM",
+  vcvtpd2dq_2 =	"rmoy:F20FuE6rM",
+  vcvtpd2ps_2 =	"rmoy:660Fu5ArM",
+  vcvtps2dq_2 =	"rmoy:660Fu5BrM",
+  vcvtps2pd_2 =	"rro:0Fu5ArM|rx/oq:|rm/yo:",
+  vcvtsd2si_2 =	"rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
+  vcvtsd2ss_3 =	"rrro:F20FV5ArM|rrx/ooq:",
+  vcvtsi2sd_3 =	"rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
+  vcvtsi2ss_3 =	"rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
+  vcvtss2sd_3 =	"rrro:F30FV5ArM|rrx/ood:",
+  vcvtss2si_2 =	"rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
+  vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
+  vcvttps2dq_2 = "rmoy:F30Fu5BrM",
+  vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
+  vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
+  vdppd_4 =	"rrmio:660F3AV41rMU",
+  vdpps_4 =	"rrmioy:660F3AV40rMU",
+  vextractf128_3 = "mri/oy:660F3AuL19RmU",
+  vextractps_3 = "mri/do:660F3Au17RmU",
+  vhaddpd_3 =	"rrmoy:660FV7CrM",
+  vhaddps_3 =	"rrmoy:F20FV7CrM",
+  vhsubpd_3 =	"rrmoy:660FV7DrM",
+  vhsubps_3 =	"rrmoy:F20FV7DrM",
+  vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
+  vinsertps_4 =	"rrrio:660F3AV21rMU|rrxi/ood:",
+  vldmxcsr_1 =	"xd:0FuAE2m",
+  vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
+  vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
+  vmovapd_2 =	"rmoy:660Fu28rM|mroy:660Fu29Rm",
+  vmovaps_2 =	"rmoy:0Fu28rM|mroy:0Fu29Rm",
+  vmovd_2 =	"rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
+  vmovq_2 =	"rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
+  vmovddup_2 =	"rmy:F20Fu12rM|rro:|rx/oq:",
+  vmovhlps_3 =	"rrro:0FV12rM",
+  vmovhpd_2 =	"xr/qo:660Fu17Rm",
+  vmovhpd_3 =	"rrx/ooq:660FV16rM",
+  vmovhps_2 =	"xr/qo:0Fu17Rm",
+  vmovhps_3 =	"rrx/ooq:0FV16rM",
+  vmovlhps_3 =	"rrro:0FV16rM",
+  vmovlpd_2 =	"xr/qo:660Fu13Rm",
+  vmovlpd_3 =	"rrx/ooq:660FV12rM",
+  vmovlps_2 =	"xr/qo:0Fu13Rm",
+  vmovlps_3 =	"rrx/ooq:0FV12rM",
+  vmovmskpd_2 =	"rr/do:660Fu50rM|rr/dy:660FuL50rM",
+  vmovmskps_2 =	"rr/do:0Fu50rM|rr/dy:0FuL50rM",
+  vmovntpd_2 =	"xroy:660Fu2BRm",
+  vmovntps_2 =	"xroy:0Fu2BRm",
+  vmovsd_2 =	"rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
+  vmovsd_3 =	"rrro:F20FV10rM",
+  vmovshdup_2 =	"rmoy:F30Fu16rM",
+  vmovsldup_2 =	"rmoy:F30Fu12rM",
+  vmovss_2 =	"rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
+  vmovss_3 =	"rrro:F30FV10rM",
+  vmovupd_2 =	"rmoy:660Fu10rM|mroy:660Fu11Rm",
+  vmovups_2 =	"rmoy:0Fu10rM|mroy:0Fu11Rm",
+  vorpd_3 =	"rrmoy:660FV56rM",
+  vorps_3 =	"rrmoy:0FV56rM",
+  vpermilpd_3 =	"rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
+  vpermilps_3 =	"rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
+  vperm2f128_4 = "rrmiy:660F3AV06rMU",
+  vptestpd_2 =	"rmoy:660F38u0FrM",
+  vptestps_2 =	"rmoy:660F38u0ErM",
+  vrcpps_2 =	"rmoy:0Fu53rM",
+  vrcpss_3 =	"rrro:F30FV53rM|rrx/ood:",
+  vrsqrtps_2 =	"rmoy:0Fu52rM",
+  vrsqrtss_3 =	"rrro:F30FV52rM|rrx/ood:",
+  vroundpd_3 =	"rmioy:660F3AV09rMU",
+  vroundps_3 =	"rmioy:660F3AV08rMU",
+  vroundsd_4 =	"rrrio:660F3AV0BrMU|rrxi/ooq:",
+  vroundss_4 =	"rrrio:660F3AV0ArMU|rrxi/ood:",
+  vshufpd_4 =	"rrmioy:660FVC6rMU",
+  vshufps_4 =	"rrmioy:0FVC6rMU",
+  vsqrtps_2 =	"rmoy:0Fu51rM",
+  vsqrtss_2 =	"rro:F30Fu51rM|rx/od:",
+  vsqrtpd_2 =	"rmoy:660Fu51rM",
+  vsqrtsd_2 =	"rro:F20Fu51rM|rx/oq:",
+  vstmxcsr_1 =	"xd:0FuAE3m",
+  vucomisd_2 =	"rro:660Fu2ErM|rx/oq:",
+  vucomiss_2 =	"rro:0Fu2ErM|rx/od:",
+  vunpckhpd_3 =	"rrmoy:660FV15rM",
+  vunpckhps_3 =	"rrmoy:0FV15rM",
+  vunpcklpd_3 =	"rrmoy:660FV14rM",
+  vunpcklps_3 =	"rrmoy:0FV14rM",
+  vxorpd_3 =	"rrmoy:660FV57rM",
+  vxorps_3 =	"rrmoy:0FV57rM",
+  vzeroall_0 =	"0FuL77",
+  vzeroupper_0 = "0Fu77",
+
+  -- AVX2 FP ops
+  vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
+  vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
+  -- *vgather* (!vsib)
+  vpermpd_3 =	"rmiy:660F3AuX01rMU",
+  vpermps_3 =	"rrmy:660F38V16rM",
+
+  -- AVX, AVX2 integer ops
+  -- In general, xmm requires AVX, ymm requires AVX2.
+  vlddqu_2 =	"rxoy:F20FuF0rM",
+  vmaskmovdqu_2 = "rro:660FuF7rM",
+  vmovdqa_2 =	"rmoy:660Fu6FrM|mroy:660Fu7FRm",
+  vmovdqu_2 =	"rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
+  vmovntdq_2 =	"xroy:660FuE7Rm",
+  vmovntdqa_2 =	"rxoy:660F38u2ArM",
+  vmpsadbw_4 =	"rrmioy:660F3AV42rMU",
+  vpabsb_2 =	"rmoy:660F38u1CrM",
+  vpabsd_2 =	"rmoy:660F38u1ErM",
+  vpabsw_2 =	"rmoy:660F38u1DrM",
+  vpackusdw_3 =	"rrmoy:660F38V2BrM",
+  vpalignr_4 =	"rrmioy:660F3AV0FrMU",
+  vpblendvb_4 =	"rrmroy:660F3AV4CrMs",
+  vpblendw_4 =	"rrmioy:660F3AV0ErMU",
+  vpclmulqdq_4 = "rrmio:660F3AV44rMU",
+  vpcmpeqq_3 =	"rrmoy:660F38V29rM",
+  vpcmpestri_3 = "rmio:660F3Au61rMU",
+  vpcmpestrm_3 = "rmio:660F3Au60rMU",
+  vpcmpgtq_3 =	"rrmoy:660F38V37rM",
+  vpcmpistri_3 = "rmio:660F3Au63rMU",
+  vpcmpistrm_3 = "rmio:660F3Au62rMU",
+  vpextrb_3 =	"rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
+  vpextrw_3 =	"rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
+  vpextrd_3 =	"mri/do:660F3Au16RmU",
+  vpextrq_3 =	"mri/qo:660F3Au16RmU",
+  vphaddw_3 =	"rrmoy:660F38V01rM",
+  vphaddd_3 =	"rrmoy:660F38V02rM",
+  vphaddsw_3 =	"rrmoy:660F38V03rM",
+  vphminposuw_2 = "rmo:660F38u41rM",
+  vphsubw_3 =	"rrmoy:660F38V05rM",
+  vphsubd_3 =	"rrmoy:660F38V06rM",
+  vphsubsw_3 =	"rrmoy:660F38V07rM",
+  vpinsrb_4 =	"rrri/ood:660F3AV20rMU|rrxi/oob:",
+  vpinsrw_4 =	"rrri/ood:660FVC4rMU|rrxi/oow:",
+  vpinsrd_4 =	"rrmi/ood:660F3AV22rMU",
+  vpinsrq_4 =	"rrmi/ooq:660F3AVX22rMU",
+  vpmaddubsw_3 = "rrmoy:660F38V04rM",
+  vpmaxsb_3 =	"rrmoy:660F38V3CrM",
+  vpmaxsd_3 =	"rrmoy:660F38V3DrM",
+  vpmaxuw_3 =	"rrmoy:660F38V3ErM",
+  vpmaxud_3 =	"rrmoy:660F38V3FrM",
+  vpminsb_3 =	"rrmoy:660F38V38rM",
+  vpminsd_3 =	"rrmoy:660F38V39rM",
+  vpminuw_3 =	"rrmoy:660F38V3ArM",
+  vpminud_3 =	"rrmoy:660F38V3BrM",
+  vpmovmskb_2 =	"rr/do:660FuD7rM|rr/dy:660FuLD7rM",
+  vpmovsxbw_2 =	"rroy:660F38u20rM|rx/oq:|rx/yo:",
+  vpmovsxbd_2 =	"rroy:660F38u21rM|rx/od:|rx/yq:",
+  vpmovsxbq_2 =	"rroy:660F38u22rM|rx/ow:|rx/yd:",
+  vpmovsxwd_2 =	"rroy:660F38u23rM|rx/oq:|rx/yo:",
+  vpmovsxwq_2 =	"rroy:660F38u24rM|rx/od:|rx/yq:",
+  vpmovsxdq_2 =	"rroy:660F38u25rM|rx/oq:|rx/yo:",
+  vpmovzxbw_2 =	"rroy:660F38u30rM|rx/oq:|rx/yo:",
+  vpmovzxbd_2 =	"rroy:660F38u31rM|rx/od:|rx/yq:",
+  vpmovzxbq_2 =	"rroy:660F38u32rM|rx/ow:|rx/yd:",
+  vpmovzxwd_2 =	"rroy:660F38u33rM|rx/oq:|rx/yo:",
+  vpmovzxwq_2 =	"rroy:660F38u34rM|rx/od:|rx/yq:",
+  vpmovzxdq_2 =	"rroy:660F38u35rM|rx/oq:|rx/yo:",
+  vpmuldq_3 =	"rrmoy:660F38V28rM",
+  vpmulhrsw_3 =	"rrmoy:660F38V0BrM",
+  vpmulld_3 =	"rrmoy:660F38V40rM",
+  vpshufb_3 =	"rrmoy:660F38V00rM",
+  vpshufd_3 =	"rmioy:660Fu70rMU",
+  vpshufhw_3 =	"rmioy:F30Fu70rMU",
+  vpshuflw_3 =	"rmioy:F20Fu70rMU",
+  vpsignb_3 =	"rrmoy:660F38V08rM",
+  vpsignw_3 =	"rrmoy:660F38V09rM",
+  vpsignd_3 =	"rrmoy:660F38V0ArM",
+  vpslldq_3 =	"rrioy:660Fv737mU",
+  vpsllw_3 =	"rrmoy:660FVF1rM|rrioy:660Fv716mU",
+  vpslld_3 =	"rrmoy:660FVF2rM|rrioy:660Fv726mU",
+  vpsllq_3 =	"rrmoy:660FVF3rM|rrioy:660Fv736mU",
+  vpsraw_3 =	"rrmoy:660FVE1rM|rrioy:660Fv714mU",
+  vpsrad_3 =	"rrmoy:660FVE2rM|rrioy:660Fv724mU",
+  vpsrldq_3 =	"rrioy:660Fv733mU",
+  vpsrlw_3 =	"rrmoy:660FVD1rM|rrioy:660Fv712mU",
+  vpsrld_3 =	"rrmoy:660FVD2rM|rrioy:660Fv722mU",
+  vpsrlq_3 =	"rrmoy:660FVD3rM|rrioy:660Fv732mU",
+  vptest_2 =	"rmoy:660F38u17rM",
+
+  -- AVX2 integer ops
+  vbroadcasti128_2 = "rx/yo:660F38u5ArM",
+  vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
+  vextracti128_3 = "mri/oy:660F3AuL39RmU",
+  vpblendd_4 =	"rrmioy:660F3AV02rMU",
+  vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
+  vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
+  vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
+  vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
+  vpermd_3 =	"rrmy:660F38V36rM",
+  vpermq_3 =	"rmiy:660F3AuX00rMU",
+  -- *vpgather* (!vsib)
+  vperm2i128_4 = "rrmiy:660F3AV46rMU",
+  vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
+  vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
+  vpsllvd_3 =	"rrmoy:660F38V47rM",
+  vpsllvq_3 =	"rrmoy:660F38VX47rM",
+  vpsravd_3 =	"rrmoy:660F38V46rM",
+  vpsrlvd_3 =	"rrmoy:660F38V45rM",
+  vpsrlvq_3 =	"rrmoy:660F38VX45rM",
 }
 }
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
@@ -1454,28 +1671,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
   map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
   map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
 end
 end
 
 
--- SSE FP arithmetic ops.
+-- SSE / AVX FP arithmetic ops.
 for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
 for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
 		     sub = 12, min = 13, div = 14, max = 15 } do
 		     sub = 12, min = 13, div = 14, max = 15 } do
   map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
   map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
   map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
   map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
   map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
   map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
   map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
   map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
+  if n ~= 1 then
+    map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
+    map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
+    map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
+    map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
+  end
+end
+
+-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
+for name,n in pairs{
+  paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
+  paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
+  packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
+  paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
+  pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
+  pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
+  pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
+  pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
+  pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
+  pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
+  psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
+  psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
+  punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
+  punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
+  punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
+} do
+  map_op[name.."_2"] = format("rmo:660F%02XrM", n)
+  map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
 end
 end
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
+local map_vexarg = { u = false, v = 1, V = 2 }
+
 -- Process pattern string.
 -- Process pattern string.
 local function dopattern(pat, args, sz, op, needrex)
 local function dopattern(pat, args, sz, op, needrex)
-  local digit, addin
+  local digit, addin, vex
   local opcode = 0
   local opcode = 0
   local szov = sz
   local szov = sz
   local narg = 1
   local narg = 1
   local rex = 0
   local rex = 0
 
 
   -- Limit number of section buffer positions used by a single dasm_put().
   -- Limit number of section buffer positions used by a single dasm_put().
-  -- A single opcode needs a maximum of 5 positions.
-  if secpos+5 > maxsecpos then wflush() end
+  -- A single opcode needs a maximum of 6 positions.
+  if secpos+6 > maxsecpos then wflush() end
 
 
   -- Process each character.
   -- Process each character.
   for c in gmatch(pat.."|", ".") do
   for c in gmatch(pat.."|", ".") do
@@ -1489,6 +1736,8 @@ local function dopattern(pat, args, sz, op, needrex)
       szov = nil
       szov = nil
     elseif c == "X" then	-- Force REX.W.
     elseif c == "X" then	-- Force REX.W.
       rex = 8
       rex = 8
+    elseif c == "L" then	-- Force VEX.L.
+      vex.l = true
     elseif c == "r" then	-- Merge 1st operand regno. into opcode.
     elseif c == "r" then	-- Merge 1st operand regno. into opcode.
       addin = args[1]; opcode = opcode + (addin.reg % 8)
       addin = args[1]; opcode = opcode + (addin.reg % 8)
       if narg < 2 then narg = 2 end
       if narg < 2 then narg = 2 end
@@ -1512,21 +1761,41 @@ local function dopattern(pat, args, sz, op, needrex)
       if t.xreg and t.xreg > 7 then rex = rex + 2 end
       if t.xreg and t.xreg > 7 then rex = rex + 2 end
       if s > 7 then rex = rex + 4 end
       if s > 7 then rex = rex + 4 end
       if needrex then rex = rex + 16 end
       if needrex then rex = rex + 16 end
-      wputop(szov, opcode, rex); opcode = nil
+      wputop(szov, opcode, rex, vex); opcode = nil
       local imark = sub(pat, -1) -- Force a mark (ugly).
       local imark = sub(pat, -1) -- Force a mark (ugly).
       -- Put ModRM/SIB with regno/last digit as spare.
       -- Put ModRM/SIB with regno/last digit as spare.
       wputmrmsib(t, imark, s, addin and addin.vreg)
       wputmrmsib(t, imark, s, addin and addin.vreg)
       addin = nil
       addin = nil
+    elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
+      local b = band(opcode, 255); opcode = shr(opcode, 8)
+      local m = 1
+      if b == 0x38 then m = 2
+      elseif b == 0x3a then m = 3 end
+      if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
+      if b ~= 0x0f then
+	werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
+	  "' in pattern `"..pat.."' for `"..op.."'")
+      end
+      local v = map_vexarg[c]
+      if v then v = remove(args, v) end
+      b = band(opcode, 255)
+      local p = 0
+      if b == 0x66 then p = 1
+      elseif b == 0xf3 then p = 2
+      elseif b == 0xf2 then p = 3 end
+      if p ~= 0 then opcode = shr(opcode, 8) end
+      if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
+      vex = { m = m, p = p, v = v }
     else
     else
       if opcode then -- Flush opcode.
       if opcode then -- Flush opcode.
 	if szov == "q" and rex == 0 then rex = rex + 8 end
 	if szov == "q" and rex == 0 then rex = rex + 8 end
 	if needrex then rex = rex + 16 end
 	if needrex then rex = rex + 16 end
 	if addin and addin.reg == -1 then
 	if addin and addin.reg == -1 then
-	  wputop(szov, opcode - 7, rex)
+	  wputop(szov, opcode - 7, rex, vex)
 	  waction("VREG", addin.vreg); wputxb(0)
 	  waction("VREG", addin.vreg); wputxb(0)
 	else
 	else
 	  if addin and addin.reg > 7 then rex = rex + 1 end
 	  if addin and addin.reg > 7 then rex = rex + 1 end
-	  wputop(szov, opcode, rex)
+	  wputop(szov, opcode, rex, vex)
 	end
 	end
 	opcode = nil
 	opcode = nil
       end
       end
@@ -1563,6 +1832,14 @@ local function dopattern(pat, args, sz, op, needrex)
 	  else
 	  else
 	    wputlabel("REL_", imm, 2)
 	    wputlabel("REL_", imm, 2)
 	  end
 	  end
+	elseif c == "s" then
+	  local reg = a.reg
+	  if reg < 0 then
+	    wputb(0)
+	    waction("VREG", a.vreg); wputxb(5)
+	  else
+	    wputb(shl(reg, 4))
+	  end
 	else
 	else
 	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
 	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
 	end
 	end
@@ -1639,11 +1916,14 @@ map_op[".template__"] = function(params, template, nparams)
     if pat == "" then pat = lastpat else lastpat = pat end
     if pat == "" then pat = lastpat else lastpat = pat end
     if matchtm(tm, args) then
     if matchtm(tm, args) then
       local prefix = sub(szm, 1, 1)
       local prefix = sub(szm, 1, 1)
-      if prefix == "/" then -- Match both operand sizes.
-	if args[1].opsize == sub(szm, 2, 2) and
-	   args[2].opsize == sub(szm, 3, 3) then
-	  dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
-	  return
+      if prefix == "/" then -- Exactly match leading operand sizes.
+	for i = #szm,1,-1 do
+	  if i == 1 then
+	    dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
+	    return
+	  elseif args[i-1].opsize ~= sub(szm, i, i) then
+	    break
+	  end
 	end
 	end
       else -- Match common operand size.
       else -- Match common operand size.
 	local szp = sz
 	local szp = sz
@@ -1678,7 +1958,7 @@ if x64 then
   function map_op.mov64_2(params)
   function map_op.mov64_2(params)
     if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
     if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
     if secpos+2 > maxsecpos then wflush() end
     if secpos+2 > maxsecpos then wflush() end
-    local opcode, op64, sz, rex
+    local opcode, op64, sz, rex, vreg
     local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
     local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
     if op64 then
     if op64 then
       local a = parseoperand(params[2])
       local a = parseoperand(params[2])
@@ -1699,11 +1979,17 @@ if x64 then
 	  werror("bad operand mode")
 	  werror("bad operand mode")
 	end
 	end
 	op64 = params[2]
 	op64 = params[2]
-	opcode = 0xb8 + band(a.reg, 7) -- !x64: no VREG support.
+	if a.reg == -1 then
+	  vreg = a.vreg
+	  opcode = 0xb8
+	else
+	  opcode = 0xb8 + band(a.reg, 7)
+	end
 	rex = a.reg > 7 and 9 or 8
 	rex = a.reg > 7 and 9 or 8
       end
       end
     end
     end
     wputop(sz, opcode, rex)
     wputop(sz, opcode, rex)
+    if vreg then waction("VREG", vreg); wputxb(0) end
     waction("IMM_D", format("(unsigned int)(%s)", op64))
     waction("IMM_D", format("(unsigned int)(%s)", op64))
     waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
     waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
   end
   end

+ 10 - 11
jni/LuaJIT-2.0.1/dynasm/dynasm.lua → jni/LuaJIT-2.1/dynasm/dynasm.lua

@@ -2,7 +2,7 @@
 -- DynASM. A dynamic assembler for code generation engines.
 -- DynASM. A dynamic assembler for code generation engines.
 -- Originally designed and implemented for LuaJIT.
 -- Originally designed and implemented for LuaJIT.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See below for full copyright notice.
 -- See below for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
@@ -10,14 +10,14 @@
 local _info = {
 local _info = {
   name =	"DynASM",
   name =	"DynASM",
   description =	"A dynamic assembler for code generation engines",
   description =	"A dynamic assembler for code generation engines",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   url =		"http://luajit.org/dynasm.html",
   url =		"http://luajit.org/dynasm.html",
   license =	"MIT",
   license =	"MIT",
   copyright =	[[
   copyright =	[[
-Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 
 
 Permission is hereby granted, free of charge, to any person obtaining
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 a copy of this software and associated documentation files (the
@@ -85,7 +85,7 @@ end
 -- Resync CPP line numbers.
 -- Resync CPP line numbers.
 local function wsync()
 local function wsync()
   if g_synclineno ~= g_lineno and g_opt.cpp then
   if g_synclineno ~= g_lineno and g_opt.cpp then
-    wline("# "..g_lineno..' "'..g_fname..'"')
+    wline("#line "..g_lineno..' "'..g_fname..'"')
     g_synclineno = g_lineno
     g_synclineno = g_lineno
   end
   end
 end
 end
@@ -695,6 +695,9 @@ map_op[".arch_1"] = function(params)
   if not params then return "name" end
   if not params then return "name" end
   local err = loadarch(params[1])
   local err = loadarch(params[1])
   if err then wfatal(err) end
   if err then wfatal(err) end
+  wline(format("#if DASM_VERSION != %d", _info.vernum))
+  wline('#error "Version mismatch between DynASM and included encoding engine"')
+  wline("#endif")
 end
 end
 
 
 -- Dummy .arch pseudo-opcode to improve the error report.
 -- Dummy .arch pseudo-opcode to improve the error report.
@@ -877,13 +880,9 @@ local function dasmhead(out)
 ** DO NOT EDIT! The original file is in "%s".
 ** DO NOT EDIT! The original file is in "%s".
 */
 */
 
 
-#if DASM_VERSION != %d
-#error "Version mismatch between DynASM and included encoding engine"
-#endif
-
 ]], _info.url,
 ]], _info.url,
     _info.version, g_arch._info.arch, g_arch._info.version,
     _info.version, g_arch._info.arch, g_arch._info.version,
-    g_fname, _info.vernum))
+    g_fname))
 end
 end
 
 
 -- Read input file.
 -- Read input file.

+ 1 - 1
jni/LuaJIT-2.0.1/etc/luajit.1 → jni/LuaJIT-2.1/etc/luajit.1

@@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end"
 Runs some nested loops and shows the resulting traces.
 Runs some nested loops and shows the resulting traces.
 .SH COPYRIGHT
 .SH COPYRIGHT
 .PP
 .PP
-\fBLuaJIT\fR is Copyright \(co 2005-2013 Mike Pall.
+\fBLuaJIT\fR is Copyright \(co 2005-2015 Mike Pall.
 .br
 .br
 \fBLuaJIT\fR is open source software, released under the MIT license.
 \fBLuaJIT\fR is open source software, released under the MIT license.
 .SH SEE ALSO
 .SH SEE ALSO

+ 6 - 5
jni/LuaJIT-2.0.1/etc/luajit.pc → jni/LuaJIT-2.1/etc/luajit.pc

@@ -1,18 +1,19 @@
 # Package information for LuaJIT to be used by pkg-config.
 # Package information for LuaJIT to be used by pkg-config.
 majver=2
 majver=2
-minver=0
-relver=1
-version=${majver}.${minver}.${relver}
+minver=1
+relver=0
+version=${majver}.${minver}.${relver}-beta1
 abiver=5.1
 abiver=5.1
 
 
 prefix=/usr/local
 prefix=/usr/local
+multilib=lib
 exec_prefix=${prefix}
 exec_prefix=${prefix}
-libdir=${exec_prefix}/lib
+libdir=${exec_prefix}/${multilib}
 libname=luajit-${abiver}
 libname=luajit-${abiver}
 includedir=${prefix}/include/luajit-${majver}.${minver}
 includedir=${prefix}/include/luajit-${majver}.${minver}
 
 
 INSTALL_LMOD=${prefix}/share/lua/${abiver}
 INSTALL_LMOD=${prefix}/share/lua/${abiver}
-INSTALL_CMOD=${prefix}/lib/lua/${abiver}
+INSTALL_CMOD=${prefix}/${multilib}/lua/${abiver}
 
 
 Name: LuaJIT
 Name: LuaJIT
 Description: Just-in-time compiler for Lua
 Description: Just-in-time compiler for Lua

+ 1 - 1
jni/LuaJIT-2.0.1/src/.gitignore → jni/LuaJIT-2.1/src/.gitignore

@@ -4,4 +4,4 @@ lj_ffdef.h
 lj_libdef.h
 lj_libdef.h
 lj_recdef.h
 lj_recdef.h
 lj_folddef.h
 lj_folddef.h
-lj_vm.s
+lj_vm.[sS]

+ 90 - 59
jni/LuaJIT-2.0.1/src/Makefile → jni/LuaJIT-2.1/src/Makefile

@@ -7,11 +7,11 @@
 # Also works with MinGW and Cygwin on Windows.
 # Also works with MinGW and Cygwin on Windows.
 # Please check msvcbuild.bat for building with MSVC on Windows.
 # Please check msvcbuild.bat for building with MSVC on Windows.
 #
 #
-# Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 ##############################################################################
 ##############################################################################
 
 
 MAJVER=  2
 MAJVER=  2
-MINVER=  0
+MINVER=  1
 RELVER=  0
 RELVER=  0
 ABIVER=  5.1
 ABIVER=  5.1
 NODOTABIVER= 51
 NODOTABIVER= 51
@@ -24,11 +24,13 @@ NODOTABIVER= 51
 # removing the '#' in front of them. Make sure you force a full recompile
 # removing the '#' in front of them. Make sure you force a full recompile
 # with "make clean", followed by "make" if you change any options.
 # with "make clean", followed by "make" if you change any options.
 #
 #
+DEFAULT_CC = gcc
+#
 # LuaJIT builds as a native 32 or 64 bit binary by default.
 # LuaJIT builds as a native 32 or 64 bit binary by default.
-CC= gcc
+CC= $(DEFAULT_CC)
 #
 #
 # Use this if you want to force a 32 bit build on a 64 bit multilib OS.
 # Use this if you want to force a 32 bit build on a 64 bit multilib OS.
-#CC= gcc -m32
+#CC= $(DEFAULT_CC) -m32
 #
 #
 # Since the assembler part does NOT maintain a frame pointer, it's pointless
 # Since the assembler part does NOT maintain a frame pointer, it's pointless
 # to slow down the C part by not omitting it. Debugging, tracebacks and
 # to slow down the C part by not omitting it. Debugging, tracebacks and
@@ -42,17 +44,14 @@ CCOPT= -O2 -fomit-frame-pointer
 #
 #
 # Target-specific compiler options:
 # Target-specific compiler options:
 #
 #
-# x86 only: it's recommended to compile at least for i686. Better yet,
-# compile for an architecture that has SSE2, too (-msse -msse2).
-#
 # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
 # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
 # the binaries to a different machine you could also use: -march=native
 # the binaries to a different machine you could also use: -march=native
 #
 #
-CCOPT_x86= -march=i686
+CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
 CCOPT_x64=
 CCOPT_x64=
 CCOPT_arm=
 CCOPT_arm=
+CCOPT_arm64=
 CCOPT_ppc=
 CCOPT_ppc=
-CCOPT_ppcspe=
 CCOPT_mips=
 CCOPT_mips=
 #
 #
 CCDEBUG=
 CCDEBUG=
@@ -122,8 +121,10 @@ XCFLAGS=
 #
 #
 # Use the system provided memory allocator (realloc) instead of the
 # Use the system provided memory allocator (realloc) instead of the
 # bundled memory allocator. This is slower, but sometimes helpful for
 # bundled memory allocator. This is slower, but sometimes helpful for
-# debugging. It's helpful for Valgrind's memcheck tool, too. This option
-# cannot be enabled on x64, since the built-in allocator is mandatory.
+# debugging. This option cannot be enabled on x64, since realloc usually
+# doesn't return addresses in the right address range.
+# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
+# the only way to get useful results from it for all other architectures.
 #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
 #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
 #
 #
 # This define is required to run LuaJIT under Valgrind. The Valgrind
 # This define is required to run LuaJIT under Valgrind. The Valgrind
@@ -148,6 +149,29 @@ XCFLAGS=
 # You probably don't need to change anything below this line!
 # You probably don't need to change anything below this line!
 ##############################################################################
 ##############################################################################
 
 
+##############################################################################
+# Host system detection.
+##############################################################################
+
+ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
+  HOST_SYS= Windows
+  HOST_RM= del
+else
+  HOST_SYS:= $(shell uname -s)
+  ifneq (,$(findstring MINGW,$(HOST_SYS)))
+    HOST_SYS= Windows
+    HOST_MSYS= mingw
+  endif
+  ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
+    HOST_SYS= Windows
+    HOST_MSYS= cygwin
+  endif
+  # Use Clang for OSX host.
+  ifeq (Darwin,$(HOST_SYS))
+    DEFAULT_CC= clang
+  endif
+endif
+
 ##############################################################################
 ##############################################################################
 # Flags and options for host and target.
 # Flags and options for host and target.
 ##############################################################################
 ##############################################################################
@@ -163,7 +187,8 @@ XCFLAGS=
 #   make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
 #   make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
 #   make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 #   make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 
 
-CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
 LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
 LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
 
 
 HOST_CC= $(CC)
 HOST_CC= $(CC)
@@ -188,9 +213,10 @@ TARGET_LD= $(CROSS)$(CC)
 TARGET_AR= $(CROSS)ar rcus
 TARGET_AR= $(CROSS)ar rcus
 TARGET_STRIP= $(CROSS)strip
 TARGET_STRIP= $(CROSS)strip
 
 
-TARGET_SONAME= libluajit.so
+TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
+TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER)
 TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib
 TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib
-TARGET_DYLIBPATH= $(or $(PREFIX),/usr/local)/lib/$(TARGET_DYLIBNAME)
+TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME)
 TARGET_DLLNAME= lua$(NODOTABIVER).dll
 TARGET_DLLNAME= lua$(NODOTABIVER).dll
 TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME)
 TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME)
 TARGET_DYNXLDOPTS=
 TARGET_DYNXLDOPTS=
@@ -201,6 +227,7 @@ TARGET_XLDFLAGS=
 TARGET_XLIBS= -lm
 TARGET_XLIBS= -lm
 TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
+TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
 TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
 TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
 TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
 TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
 TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -215,12 +242,17 @@ else
 ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
 ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
   TARGET_LJARCH= arm
   TARGET_LJARCH= arm
 else
 else
+ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+  TARGET_LJARCH= arm64
+else
 ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
 ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
+    TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
+  else
+    TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE
+  endif
   TARGET_LJARCH= ppc
   TARGET_LJARCH= ppc
 else
 else
-ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
-  TARGET_LJARCH= ppcspe
-else
 ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
 ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
   ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
   ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
     TARGET_ARCH= -D__MIPSEL__=1
     TARGET_ARCH= -D__MIPSEL__=1
@@ -239,9 +271,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
   TARGET_SYS= PS3
   TARGET_SYS= PS3
   TARGET_ARCH+= -D__CELLOS_LV2__
   TARGET_ARCH+= -D__CELLOS_LV2__
   TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
   TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
-endif
-ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
-  TARGET_ARCH+= -DLUAJIT_NO_UNWIND
+  TARGET_XLIBS+= -lpthread
 endif
 endif
 
 
 TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
 TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
@@ -249,45 +279,38 @@ TARGET_ARCH+= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET_LJARCH))
 
 
 ifneq (,$(PREFIX))
 ifneq (,$(PREFIX))
 ifneq (/usr/local,$(PREFIX))
 ifneq (/usr/local,$(PREFIX))
-  TARGET_XCFLAGS+= -DLUA_XROOT=\"$(PREFIX)/\"
+  TARGET_XCFLAGS+= -DLUA_ROOT=\"$(PREFIX)\"
   ifneq (/usr,$(PREFIX))
   ifneq (/usr,$(PREFIX))
-    TARGET_DYNXLDOPTS= -Wl,-rpath,$(PREFIX)/lib
+    TARGET_DYNXLDOPTS= -Wl,-rpath,$(TARGET_LIBPATH)
   endif
   endif
 endif
 endif
 endif
 endif
+ifneq (,$(MULTILIB))
+  TARGET_XCFLAGS+= -DLUA_MULTILIB=\"$(MULTILIB)\"
+endif
+ifneq (,$(LMULTILIB))
+  TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\"
+endif
 
 
 ##############################################################################
 ##############################################################################
-# System detection.
+# Target system detection.
 ##############################################################################
 ##############################################################################
 
 
-ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
-  HOST_SYS= Windows
-  HOST_RM= del
-else
-  HOST_SYS:= $(shell uname -s)
-  ifneq (,$(findstring MINGW,$(HOST_SYS)))
-    HOST_SYS= Windows
-    HOST_MSYS= mingw
-  endif
-  ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
-    HOST_SYS= Windows
-    HOST_MSYS= cygwin
-  endif
-endif
-
 TARGET_SYS?= $(HOST_SYS)
 TARGET_SYS?= $(HOST_SYS)
 ifeq (Windows,$(TARGET_SYS))
 ifeq (Windows,$(TARGET_SYS))
   TARGET_STRIP+= --strip-unneeded
   TARGET_STRIP+= --strip-unneeded
   TARGET_XSHLDFLAGS= -shared
   TARGET_XSHLDFLAGS= -shared
   TARGET_DYNXLDOPTS=
   TARGET_DYNXLDOPTS=
 else
 else
+ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
+  TARGET_XCFLAGS+= -fno-stack-protector
+endif
 ifeq (Darwin,$(TARGET_SYS))
 ifeq (Darwin,$(TARGET_SYS))
   ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
   ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
     export MACOSX_DEPLOYMENT_TARGET=10.4
     export MACOSX_DEPLOYMENT_TARGET=10.4
   endif
   endif
   TARGET_STRIP+= -x
   TARGET_STRIP+= -x
   TARGET_AR+= 2>/dev/null
   TARGET_AR+= 2>/dev/null
-  TARGET_XCFLAGS+= -fno-stack-protector
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
@@ -299,14 +322,13 @@ else
 ifeq (iOS,$(TARGET_SYS))
 ifeq (iOS,$(TARGET_SYS))
   TARGET_STRIP+= -x
   TARGET_STRIP+= -x
   TARGET_AR+= 2>/dev/null
   TARGET_AR+= 2>/dev/null
-  TARGET_XCFLAGS+= -fno-stack-protector
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-else
-  ifneq (,$(findstring stack-protector,$(shell $(TARGET_CC) -dumpspecs)))
-    TARGET_XCFLAGS+= -fno-stack-protector
+  ifeq (arm64,$(TARGET_LJARCH))
+    TARGET_XCFLAGS+= -fno-omit-frame-pointer
   endif
   endif
+else
   ifneq (SunOS,$(TARGET_SYS))
   ifneq (SunOS,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
       TARGET_XLDFLAGS+= -Wl,-E
       TARGET_XLDFLAGS+= -Wl,-E
@@ -390,17 +412,18 @@ ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH)))
 else
 else
   TARGET_ARCH+= -DLJ_ABI_SOFTFP=1
   TARGET_ARCH+= -DLJ_ABI_SOFTFP=1
 endif
 endif
+ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D NO_UNWIND
+  TARGET_ARCH+= -DLUAJIT_NO_UNWIND
+endif
 DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
 DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
 ifeq (Windows,$(TARGET_SYS))
 ifeq (Windows,$(TARGET_SYS))
   DASM_AFLAGS+= -D WIN
   DASM_AFLAGS+= -D WIN
 endif
 endif
-ifeq (x86,$(TARGET_LJARCH))
-  ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
-    DASM_AFLAGS+= -D SSE
-  endif
-else
 ifeq (x64,$(TARGET_LJARCH))
 ifeq (x64,$(TARGET_LJARCH))
-  DASM_ARCH= x86
+  ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
+    DASM_ARCH= x86
+  endif
 else
 else
 ifeq (arm,$(TARGET_LJARCH))
 ifeq (arm,$(TARGET_LJARCH))
   ifeq (iOS,$(TARGET_SYS))
   ifeq (iOS,$(TARGET_SYS))
@@ -414,13 +437,15 @@ ifeq (ppc,$(TARGET_LJARCH))
   ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
   ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
     DASM_AFLAGS+= -D ROUND
     DASM_AFLAGS+= -D ROUND
   endif
   endif
-  ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
     DASM_AFLAGS+= -D GPR64
     DASM_AFLAGS+= -D GPR64
   endif
   endif
   ifeq (PS3,$(TARGET_SYS))
   ifeq (PS3,$(TARGET_SYS))
     DASM_AFLAGS+= -D PPE -D TOC
     DASM_AFLAGS+= -D PPE -D TOC
   endif
   endif
-endif
+  ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
+    DASM_ARCH= ppc64
+  endif
 endif
 endif
 endif
 endif
 endif
 endif
@@ -436,7 +461,7 @@ BUILDVM_X= $(BUILDVM_T)
 HOST_O= $(MINILUA_O) $(BUILDVM_O)
 HOST_O= $(MINILUA_O) $(BUILDVM_O)
 HOST_T= $(MINILUA_T) $(BUILDVM_T)
 HOST_T= $(MINILUA_T) $(BUILDVM_T)
 
 
-LJVM_S= lj_vm.s
+LJVM_S= lj_vm.S
 LJVM_O= lj_vm.o
 LJVM_O= lj_vm.o
 LJVM_BOUT= $(LJVM_S)
 LJVM_BOUT= $(LJVM_S)
 LJVM_MODE= elfasm
 LJVM_MODE= elfasm
@@ -445,10 +470,11 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
 	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
 	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
 LJLIB_C= $(LJLIB_O:.o=.c)
 LJLIB_C= $(LJLIB_O:.o=.c)
 
 
-LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
+LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
-	  lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+	  lj_strfmt.o lj_api.o lj_profile.o \
+	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
 	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
 	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -487,7 +513,8 @@ TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO)
 
 
 ifeq (Windows,$(TARGET_SYS))
 ifeq (Windows,$(TARGET_SYS))
   TARGET_DYNCC= $(STATIC_CC)
   TARGET_DYNCC= $(STATIC_CC)
-  LJVM_MODE= coffasm
+  LJVM_MODE= peobj
+  LJVM_BOUT= $(LJVM_O)
   LUAJIT_T= luajit.exe
   LUAJIT_T= luajit.exe
   ifeq (cygwin,$(HOST_MSYS))
   ifeq (cygwin,$(HOST_MSYS))
     LUAJIT_SO= cyg$(TARGET_DLLNAME)
     LUAJIT_SO= cyg$(TARGET_DLLNAME)
@@ -567,6 +594,10 @@ amalg:
 clean:
 clean:
 	$(HOST_RM) $(ALL_RM)
 	$(HOST_RM) $(ALL_RM)
 
 
+libbc:
+	./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
+	$(MAKE) all
+
 depend:
 depend:
 	@for file in $(ALL_HDRGEN); do \
 	@for file in $(ALL_HDRGEN); do \
 	  test -f $$file || touch $$file; \
 	  test -f $$file || touch $$file; \
@@ -581,7 +612,7 @@ depend:
 	  test -s $$file || $(HOST_RM) $$file; \
 	  test -s $$file || $(HOST_RM) $$file; \
 	  done
 	  done
 
 
-.PHONY: default all amalg clean depend
+.PHONY: default all amalg clean libbc depend
 
 
 ##############################################################################
 ##############################################################################
 # Rules for generated files.
 # Rules for generated files.
@@ -638,10 +669,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
 	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
 	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
 	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
 	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
 
 
-%.o: %.s
+%.o: %.S
 	$(E) "ASM       $@"
 	$(E) "ASM       $@"
-	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
-	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
+	$(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
+	$(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
 
 
 $(LUAJIT_O):
 $(LUAJIT_O):
 	$(E) "CC        $@"
 	$(E) "CC        $@"

+ 111 - 93
jni/LuaJIT-2.0.1/src/Makefile.dep → jni/LuaJIT-2.1/src/Makefile.dep

@@ -5,43 +5,47 @@ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
  lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
  lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
  lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
  lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
- lj_lib.h lj_libdef.h
+ lj_strfmt.h lj_lib.h lj_libdef.h
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
+ lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
+ lj_ffdef.h lj_lib.h lj_libdef.h
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_libdef.h
  lj_libdef.h
 lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
 lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
  lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
  lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
- lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+ lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
+ lj_libdef.h
 lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
 lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
 lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
 lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h lj_ffdef.h \
- lj_lib.h lj_libdef.h
-lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
- lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
- lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \
- lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \
- lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
+ lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
+ lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
 lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
  lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
 lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
 lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
+ lj_libdef.h
 lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
  lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
 lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
- lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \
- lj_lib.h lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
+ lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
 lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \
- lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
 lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
 lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
 lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
  lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
  lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
  lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h
+ lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
 lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
  lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
  lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
  lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
@@ -50,17 +54,20 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
 lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
  lj_bcdef.h
  lj_bcdef.h
 lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \
- lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
+ lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
+ lj_strfmt.h
 lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \
- lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+ lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
+ lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
 lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_carith.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
+ lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
 lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
+ lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h
  lj_traceerr.h
 lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
 lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -71,107 +78,116 @@ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
  lj_ccallback.h
  lj_ccallback.h
 lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
 lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
 lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
 lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_clib.h
+ lj_cdata.h lj_clib.h lj_strfmt.h
 lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \
- lj_bc.h lj_vm.h lj_char.h lj_strscan.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
 lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
- lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \
- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+ lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
+ lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
+ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
- lj_crecord.h
+ lj_crecord.h lj_strfmt.h
 lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
+ lj_ccallback.h
 lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \
- lj_bc.h lj_jit.h lj_ir.h
+ lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
+ lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
 lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \
- lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \
- lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \
- lj_vm.h luajit.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
+ lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
+ lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
 lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
 lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
  lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
  lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
- lj_traceerr.h lj_vm.h
+ lj_traceerr.h lj_vm.h lj_strfmt.h
 lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
  lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
- lj_vm.h lj_strscan.h lj_recdef.h
+ lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h lj_vm.h
  lj_traceerr.h lj_vm.h
 lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \
- lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
+ lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
 lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \
- lj_ir.h lj_dispatch.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
+ lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
- lj_vm.h lj_strscan.h lj_lib.h
+ lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
- lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
+ lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
+ lj_strfmt.h
 lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
 lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h
+ lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
+ lj_bcdump.h lj_lib.h
 lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
 lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \
- lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
 lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \
- lj_traceerr.h lj_vm.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_vm.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
 lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h
  lj_ir.h lj_jit.h lj_iropt.h
 lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
- lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \
- lj_strscan.h lj_folddef.h
+ lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
 lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
+ lj_vm.h
 lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
+ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h
 lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
 lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
  lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h lj_strscan.h
  lj_traceerr.h lj_vm.h lj_strscan.h
 lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
  lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
 lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
 lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_vm.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
+ lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
 lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \
- lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
+ lj_vm.h lj_vmevent.h
+lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
+ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \
- lj_ffrecord.h lj_snap.h lj_vm.h
+ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
+ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
  lj_target_*.h lj_ctype.h lj_cdata.h
  lj_target_*.h lj_ctype.h lj_cdata.h
 lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
+ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h
+ lj_err.h lj_errmsg.h lj_str.h lj_char.h
+lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_char.h lj_strscan.h
  lj_char.h lj_strscan.h
 lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -189,21 +205,22 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_vm.h
  lj_ir.h lj_vm.h
 ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
 ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \
- lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \
- lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \
- lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \
- lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \
- lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \
- luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \
- lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
- lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
- lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
- lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
- lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \
- lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
- lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
- lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
+ lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
+ lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
+ lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
+ lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
+ lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
+ lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
+ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
+ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
+ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
+ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
+ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
+ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
+ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
  lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
  lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
  lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
  lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
  lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
  lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
@@ -220,7 +237,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
 host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
 host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
  luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
 host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
 host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
- lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h
+ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
+ host/buildvm_libbc.h
 host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
 host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
  luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
 host/minilua.o: host/minilua.c
 host/minilua.o: host/minilua.c

+ 0 - 0
jni/LuaJIT-2.0.1/src/host/.gitignore → jni/LuaJIT-2.1/src/host/.gitignore


+ 0 - 0
jni/LuaJIT-2.0.1/src/host/README → jni/LuaJIT-2.1/src/host/README


+ 18 - 10
jni/LuaJIT-2.0.1/src/host/buildvm.c → jni/LuaJIT-2.1/src/host/buildvm.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder.
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** This is a tool to build the hand-tuned assembler code required for
 ** This is a tool to build the hand-tuned assembler code required for
 ** LuaJIT's bytecode interpreter. It supports a variety of output formats
 ** LuaJIT's bytecode interpreter. It supports a variety of output formats
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
 #include "../dynasm/dasm_x86.h"
 #include "../dynasm/dasm_x86.h"
 #elif LJ_TARGET_ARM
 #elif LJ_TARGET_ARM
 #include "../dynasm/dasm_arm.h"
 #include "../dynasm/dasm_arm.h"
+#elif LJ_TARGET_ARM64
+#include "../dynasm/dasm_arm64.h"
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 #include "../dynasm/dasm_ppc.h"
 #include "../dynasm/dasm_ppc.h"
-#elif LJ_TARGET_PPCSPE
-#include "../dynasm/dasm_ppc.h"
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 #include "../dynasm/dasm_mips.h"
 #include "../dynasm/dasm_mips.h"
 #else
 #else
@@ -108,10 +108,16 @@ static const char *sym_decorate(BuildCtx *ctx,
   sprintf(name, "%s%s%s", symprefix, prefix, suffix);
   sprintf(name, "%s%s%s", symprefix, prefix, suffix);
   p = strchr(name, '@');
   p = strchr(name, '@');
   if (p) {
   if (p) {
+#if LJ_TARGET_X86ORX64
     if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
     if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
       name[0] = '@';
       name[0] = '@';
     else
     else
       *p = '\0';
       *p = '\0';
+#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
+    /* Keep @plt etc. */
+#else
+    *p = '\0';
+#endif
   }
   }
   p = (char *)malloc(strlen(name)+1);  /* MSVC doesn't like strdup. */
   p = (char *)malloc(strlen(name)+1);  /* MSVC doesn't like strdup. */
   strcpy(p, name);
   strcpy(p, name);
@@ -173,6 +179,7 @@ static int build_code(BuildCtx *ctx)
   ctx->nreloc = 0;
   ctx->nreloc = 0;
 
 
   ctx->globnames = globnames;
   ctx->globnames = globnames;
+  ctx->extnames = extnames;
   ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
   ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
   ctx->nrelocsym = 0;
   ctx->nrelocsym = 0;
   for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
   for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -314,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx)
   char buf[80];
   char buf[80];
   int i;
   int i;
   fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
   fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
-  fprintf(ctx->fp, "module(...)\n\n");
+  fprintf(ctx->fp, "return {\n\n");
 
 
   fprintf(ctx->fp, "bcnames = \"");
   fprintf(ctx->fp, "bcnames = \"");
   for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
   for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
 
   fprintf(ctx->fp, "irnames = \"");
   fprintf(ctx->fp, "irnames = \"");
   for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
   for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
 
   fprintf(ctx->fp, "irfpm = { [0]=");
   fprintf(ctx->fp, "irfpm = { [0]=");
   for (i = 0; irfpm_names[i]; i++)
   for (i = 0; irfpm_names[i]; i++)
     fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
     fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
 
   fprintf(ctx->fp, "irfield = { [0]=");
   fprintf(ctx->fp, "irfield = { [0]=");
   for (i = 0; irfield_names[i]; i++) {
   for (i = 0; irfield_names[i]; i++) {
@@ -337,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx)
     if (p) *p = '.';
     if (p) *p = '.';
     fprintf(ctx->fp, "\"%s\", ", buf);
     fprintf(ctx->fp, "\"%s\", ", buf);
   }
   }
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
 
   fprintf(ctx->fp, "ircall = {\n[0]=");
   fprintf(ctx->fp, "ircall = {\n[0]=");
   for (i = 0; ircall_names[i]; i++)
   for (i = 0; ircall_names[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
     fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
 
   fprintf(ctx->fp, "traceerr = {\n[0]=");
   fprintf(ctx->fp, "traceerr = {\n[0]=");
   for (i = 0; trace_errors[i]; i++)
   for (i = 0; trace_errors[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
     fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 }
 }
 
 
 /* -- Argument parsing ---------------------------------------------------- */
 /* -- Argument parsing ---------------------------------------------------- */
@@ -484,6 +491,7 @@ int main(int argc, char **argv)
   case BUILD_vmdef:
   case BUILD_vmdef:
     emit_vmdef(ctx);
     emit_vmdef(ctx);
     emit_lib(ctx);
     emit_lib(ctx);
+    fprintf(ctx->fp, "}\n\n");
     break;
     break;
   case BUILD_ffdef:
   case BUILD_ffdef:
   case BUILD_libdef:
   case BUILD_libdef:

+ 2 - 1
jni/LuaJIT-2.0.1/src/host/buildvm.h → jni/LuaJIT-2.1/src/host/buildvm.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder.
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _BUILDVM_H
 #ifndef _BUILDVM_H
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
   const char *beginsym;
   const char *beginsym;
   /* Strings generated by DynASM. */
   /* Strings generated by DynASM. */
   const char *const *globnames;
   const char *const *globnames;
+  const char *const *extnames;
   const char *dasm_ident;
   const char *dasm_ident;
   const char *dasm_arch;
   const char *dasm_arch;
   /* Relocations. */
   /* Relocations. */

+ 44 - 12
jni/LuaJIT-2.0.1/src/host/buildvm_asm.c → jni/LuaJIT-2.1/src/host/buildvm_asm.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder: Assembler source code emitter.
 ** LuaJIT VM builder: Assembler source code emitter.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "buildvm.h"
 #include "buildvm.h"
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
   "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
   "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
 };
 };
 
 
-/* Emit relocation for the incredibly stupid OSX assembler. */
-static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
+/* Emit x86/x64 text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
 				const char *sym)
 				const char *sym)
 {
 {
   const char *opname = NULL;
   const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
     exit(1);
     exit(1);
   }
   }
   emit_asm_bytes(ctx, cp, n);
   emit_asm_bytes(ctx, cp, n);
+  if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+    /* Various fixups for external symbols outside of our binary. */
+    if (ctx->mode == BUILD_elfasm) {
+      if (LJ_32)
+	fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
+      fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
+      if (LJ_32)
+	fprintf(ctx->fp, "#endif\n");
+      return;
+    } else if (LJ_32 && ctx->mode == BUILD_machasm) {
+      fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
+      return;
+    }
+  }
   fprintf(ctx->fp, "\t%s %s\n", opname, sym);
   fprintf(ctx->fp, "\t%s %s\n", opname, sym);
 }
 }
 #else
 #else
@@ -100,14 +114,23 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
     fprintf(ctx->fp, "\tblx %s\n", sym);
     fprintf(ctx->fp, "\tblx %s\n", sym);
   } else if ((ins & 0x0e000000u) == 0x0a000000u) {
   } else if ((ins & 0x0e000000u) == 0x0a000000u) {
     fprintf(ctx->fp, "\t%s%.2s %s\n", (ins & 0x01000000u) ? "bl" : "b",
     fprintf(ctx->fp, "\t%s%.2s %s\n", (ins & 0x01000000u) ? "bl" : "b",
-	    "eqnecsccmiplvsvchilsgeltgtle" + 2*(ins >> 28), sym);
+	    &"eqnecsccmiplvsvchilsgeltgtle"[2*(ins >> 28)], sym);
+  } else {
+    fprintf(stderr,
+	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
+	    ins, sym);
+    exit(1);
+  }
+#elif LJ_TARGET_ARM64
+  if ((ins >> 26) == 0x25u) {
+    fprintf(ctx->fp, "\tbl %s\n", sym);
   } else {
   } else {
     fprintf(stderr,
     fprintf(stderr,
 	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
 	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
 	    ins, sym);
 	    ins, sym);
     exit(1);
     exit(1);
   }
   }
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_PPC
 #if LJ_TARGET_PS3
 #if LJ_TARGET_PS3
 #define TOCPREFIX "."
 #define TOCPREFIX "."
 #else
 #else
@@ -117,6 +140,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
     fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
     fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
 	    (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
 	    (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
   } else if ((ins >> 26) == 18) {
   } else if ((ins >> 26) == 18) {
+#if LJ_ARCH_PPC64
+    const char *suffix = strchr(sym, '@');
+    if (suffix && suffix[1] == 'h') {
+      fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
+    } else if (suffix && suffix[1] == 'l') {
+      fprintf(ctx->fp, "\tld 12, %s\n", sym);
+    } else
+#endif
     fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
     fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
   } else {
   } else {
     fprintf(stderr,
     fprintf(stderr,
@@ -214,6 +245,9 @@ void emit_asm(BuildCtx *ctx)
   int i, rel;
   int i, rel;
 
 
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+#if LJ_ARCH_PPC64
+  fprintf(ctx->fp, "\t.abiversion 2\n");
+#endif
   fprintf(ctx->fp, "\t.text\n");
   fprintf(ctx->fp, "\t.text\n");
   emit_asm_align(ctx, 4);
   emit_asm_align(ctx, 4);
 
 
@@ -254,8 +288,9 @@ void emit_asm(BuildCtx *ctx)
       BuildReloc *r = &ctx->reloc[rel];
       BuildReloc *r = &ctx->reloc[rel];
       int n = r->ofs - ofs;
       int n = r->ofs - ofs;
 #if LJ_TARGET_X86ORX64
 #if LJ_TARGET_X86ORX64
-      if (ctx->mode == BUILD_machasm && r->type != 0) {
-	emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+      if (r->type != 0 &&
+	  (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
+	emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
       } else {
       } else {
 	emit_asm_bytes(ctx, ctx->code+ofs, n);
 	emit_asm_bytes(ctx, ctx->code+ofs, n);
 	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
 	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -286,13 +321,10 @@ void emit_asm(BuildCtx *ctx)
   fprintf(ctx->fp, "\n");
   fprintf(ctx->fp, "\n");
   switch (ctx->mode) {
   switch (ctx->mode) {
   case BUILD_elfasm:
   case BUILD_elfasm:
-#if !LJ_TARGET_PS3
+#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
     fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
     fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
 #endif
 #endif
-#if LJ_TARGET_PPCSPE
-    /* Soft-float ABI + SPE. */
-    fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
-#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3
     /* Hard-float ABI. */
     /* Hard-float ABI. */
     fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
     fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
 #endif
 #endif

+ 1 - 1
jni/LuaJIT-2.0.1/src/host/buildvm_fold.c → jni/LuaJIT-2.1/src/host/buildvm_fold.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder: IR folding hash table generator.
 ** LuaJIT VM builder: IR folding hash table generator.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "buildvm.h"
 #include "buildvm.h"

+ 61 - 2
jni/LuaJIT-2.0.1/src/host/buildvm_lib.c → jni/LuaJIT-2.1/src/host/buildvm_lib.c

@@ -1,11 +1,13 @@
 /*
 /*
 ** LuaJIT VM builder: library definition compiler.
 ** LuaJIT VM builder: library definition compiler.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "buildvm.h"
 #include "buildvm.h"
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_bc.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
+#include "buildvm_libbc.h"
 
 
 /* Context for library definitions. */
 /* Context for library definitions. */
 static uint8_t obuf[8192];
 static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
   regfunc = REGFUNC_OK;
   regfunc = REGFUNC_OK;
 }
 }
 
 
+static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
+{
+  uint32_t v = *p++;
+  if (v >= 0x80) {
+    int sh = 0; v &= 0x7f;
+    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+  }
+  *vv = v;
+  return p;
+}
+
+static void libdef_fixupbc(uint8_t *p)
+{
+  uint32_t i, sizebc;
+  p += 4;
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  for (i = 0; i < sizebc; i++, p += 4) {
+    uint8_t op = p[libbc_endian ? 3 : 0];
+    uint8_t ra = p[libbc_endian ? 2 : 1];
+    uint8_t rc = p[libbc_endian ? 1 : 2];
+    uint8_t rb = p[libbc_endian ? 0 : 3];
+    if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
+      op = BC_ISNUM; rc++;
+    }
+    p[LJ_ENDIAN_SELECT(0, 3)] = op;
+    p[LJ_ENDIAN_SELECT(1, 2)] = ra;
+    p[LJ_ENDIAN_SELECT(2, 1)] = rc;
+    p[LJ_ENDIAN_SELECT(3, 0)] = rb;
+  }
+}
+
+static void libdef_lua(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(arg);
+  if (ctx->mode == BUILD_libdef) {
+    int i;
+    for (i = 0; libbc_map[i].name != NULL; i++) {
+      if (!strcmp(libbc_map[i].name, p)) {
+	int ofs = libbc_map[i].ofs;
+	int len = libbc_map[i+1].ofs - ofs;
+	obuf[2]++;  /* Bump hash table size. */
+	*optr++ = LIBINIT_LUA;
+	libdef_name(p, 0);
+	memcpy(optr, libbc_code + ofs, len);
+	libdef_fixupbc(optr);
+	optr += len;
+	return;
+      }
+    }
+    fprintf(stderr, "Error: missing libbc definition for %s\n", p);
+    exit(1);
+  }
+}
+
 static uint32_t find_rec(char *name)
 static uint32_t find_rec(char *name)
 {
 {
   char *p = (char *)obuf;
   char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
   { "CF(",	")",		libdef_func,		LIBINIT_CF },
   { "CF(",	")",		libdef_func,		LIBINIT_CF },
   { "ASM(",	")",		libdef_func,		LIBINIT_ASM },
   { "ASM(",	")",		libdef_func,		LIBINIT_ASM },
   { "ASM_(",	")",		libdef_func,		LIBINIT_ASM_ },
   { "ASM_(",	")",		libdef_func,		LIBINIT_ASM_ },
+  { "LUA(",	")",		libdef_lua,		0 },
   { "REC(",	")",		libdef_rec,		0 },
   { "REC(",	")",		libdef_rec,		0 },
   { "PUSH(",	")",		libdef_push,		0 },
   { "PUSH(",	")",		libdef_push,		0 },
   { "SET(",	")",		libdef_set,		0 },
   { "SET(",	")",		libdef_set,		0 },
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx)
       "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
       "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
       ffasmfunc);
       ffasmfunc);
   } else if (ctx->mode == BUILD_vmdef) {
   } else if (ctx->mode == BUILD_vmdef) {
-    fprintf(ctx->fp, "}\n\n");
+    fprintf(ctx->fp, "},\n\n");
   } else if (ctx->mode == BUILD_bcdef) {
   } else if (ctx->mode == BUILD_bcdef) {
     int i;
     int i;
     fprintf(ctx->fp, "\n};\n\n");
     fprintf(ctx->fp, "\n};\n\n");

+ 45 - 0
jni/LuaJIT-2.1/src/host/buildvm_libbc.h

@@ -0,0 +1,45 @@
+/* This is a generated file. DO NOT EDIT! */
+
+static const int libbc_endian = 0;
+
+static const uint8_t libbc_code[] = {
+#if LJ_FR2
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
+0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0
+#else
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
+0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0
+#endif
+};
+
+static const struct { const char *name; int ofs; } libbc_map[] = {
+{"math_deg",0},
+{"math_rad",25},
+{"string_len",50},
+{"table_foreachi",69},
+{"table_foreach",136},
+{"table_getn",207},
+{"table_remove",226},
+{NULL,355}
+};
+

+ 1 - 1
jni/LuaJIT-2.0.1/src/host/buildvm_peobj.c → jni/LuaJIT-2.1/src/host/buildvm_peobj.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder: PE object emitter.
 ** LuaJIT VM builder: PE object emitter.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Only used for building on Windows, since we cannot assume the presence
 ** Only used for building on Windows, since we cannot assume the presence
 ** of a suitable assembler. The host and target byte order must match.
 ** of a suitable assembler. The host and target byte order must match.

+ 197 - 0
jni/LuaJIT-2.1/src/host/genlibbc.lua

@@ -0,0 +1,197 @@
+----------------------------------------------------------------------------
+-- Lua script to dump the bytecode of the library functions written in Lua.
+-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
+----------------------------------------------------------------------------
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+
+local ffi = require("ffi")
+local bit = require("bit")
+local vmdef = require("jit.vmdef")
+local bcnames = vmdef.bcnames
+
+local format = string.format
+
+local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
+
+local function usage(arg)
+  io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
+		  " [-o buildvm_libbc.h] lib_*.c\n")
+  os.exit(1)
+end
+
+local function parse_arg(arg)
+  local outfile = "-"
+  if not (arg and arg[1]) then
+    usage(arg)
+  end
+  if arg[1] == "-o" then
+    outfile = arg[2]
+    if not outfile then usage(arg) end
+    table.remove(arg, 1)
+    table.remove(arg, 1)
+  end
+  return outfile
+end
+
+local function read_files(names)
+  local src = ""
+  for _,name in ipairs(names) do
+    local fp = assert(io.open(name))
+    src = src .. fp:read("*a")
+    fp:close()
+  end
+  return src
+end
+
+local function transform_lua(code)
+  local fixup = {}
+  local n = -30000
+  code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
+    n = n + 1
+    fixup[n] = { "CHECK", tp }
+    return format("%s=%d", var, n)
+  end)
+  code = string.gsub(code, "PAIRS%((.-)%)", function(var)
+    fixup.PAIRS = true
+    return format("nil, %s, 0", var)
+  end)
+  return "return "..code, fixup
+end
+
+local function read_uleb128(p)
+  local v = p[0]; p = p + 1
+  if v >= 128 then
+    local sh = 7; v = v - 128
+    repeat
+      local r = p[0]
+      v = v + bit.lshift(bit.band(r, 127), sh)
+      sh = sh + 7
+      p = p + 1
+    until r < 128
+  end
+  return p, v
+end
+
+-- ORDER LJ_T
+local name2itype = {
+  str = 5, func = 9, tab = 12, int = 14, num = 15
+}
+
+local BC = {}
+for i=0,#bcnames/6-1 do
+  BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
+end
+local xop, xra = isbe and 3 or 0, isbe and 2 or 1
+local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
+
+local function fixup_dump(dump, fixup)
+  local buf = ffi.new("uint8_t[?]", #dump+1, dump)
+  local p = buf+5
+  local n, sizebc
+  p, n = read_uleb128(p)
+  local start = p
+  p = p + 4
+  p = read_uleb128(p)
+  p = read_uleb128(p)
+  p, sizebc = read_uleb128(p)
+  local rawtab = {}
+  for i=0,sizebc-1 do
+    local op = p[xop]
+    if op == BC.KSHORT then
+      local rd = p[xrc] + 256*p[xrb]
+      rd = bit.arshift(bit.lshift(rd, 16), 16)
+      local f = fixup[rd]
+      if f then
+	if f[1] == "CHECK" then
+	  local tp = f[2]
+	  if tp == "tab" then rawtab[p[xra]] = true end
+	  p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
+	  p[xrb] = 0
+	  p[xrc] = name2itype[tp]
+	else
+	  error("unhandled fixup type: "..f[1])
+	end
+      end
+    elseif op == BC.TGETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TGETR
+      end
+    elseif op == BC.TSETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TSETR
+      end
+    elseif op == BC.ITERC then
+      if fixup.PAIRS then
+	p[xop] = BC.ITERN
+      end
+    end
+    p = p + 4
+  end
+  return ffi.string(start, n)
+end
+
+local function find_defs(src)
+  local defs = {}
+  for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
+    local env = {}
+    local tcode, fixup = transform_lua(code)
+    local func = assert(load(tcode, "", nil, env))()
+    defs[name] = fixup_dump(string.dump(func, true), fixup)
+    defs[#defs+1] = name
+  end
+  return defs
+end
+
+local function gen_header(defs)
+  local t = {}
+  local function w(x) t[#t+1] = x end
+  w("/* This is a generated file. DO NOT EDIT! */\n\n")
+  w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
+  local s = ""
+  for _,name in ipairs(defs) do
+    s = s .. defs[name]
+  end
+  w("static const uint8_t libbc_code[] = {\n")
+  local n = 0
+  for i=1,#s do
+    local x = string.byte(s, i)
+    w(x); w(",")
+    n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
+    if n >= 75 then n = 0; w("\n") end
+  end
+  w("0\n};\n\n")
+  w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
+  local m = 0
+  for _,name in ipairs(defs) do
+    w('{"'); w(name); w('",'); w(m) w('},\n')
+    m = m + #defs[name]
+  end
+  w("{NULL,"); w(m); w("}\n};\n\n")
+  return table.concat(t)
+end
+
+local function write_file(name, data)
+  if name == "-" then
+    assert(io.write(data))
+    assert(io.flush())
+  else
+    local fp = io.open(name)
+    if fp then
+      local old = fp:read("*a")
+      fp:close()
+      if data == old then return end
+    end
+    fp = assert(io.open(name, "w"))
+    assert(fp:write(data))
+    assert(fp:close())
+  end
+end
+
+local outfile = parse_arg(arg)
+local src = read_files(arg)
+local defs = find_defs(src)
+local hdr = gen_header(defs)
+write_file(outfile, hdr)
+

+ 2 - 1
jni/LuaJIT-2.0.1/src/host/genminilua.lua → jni/LuaJIT-2.1/src/host/genminilua.lua

@@ -2,7 +2,7 @@
 -- Lua script to generate a customized, minified version of Lua.
 -- Lua script to generate a customized, minified version of Lua.
 -- The resulting 'minilua' is used for the build process of LuaJIT.
 -- The resulting 'minilua' is used for the build process of LuaJIT.
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 
 
@@ -210,6 +210,7 @@ typedef unsigned __int64 U64;
 #else
 #else
 typedef unsigned long long U64;
 typedef unsigned long long U64;
 #endif
 #endif
+int _CRT_glob = 0;
 ]]}, {}
 ]]}, {}
 
 
 local function preprocess(src)
 local function preprocess(src)

+ 1 - 0
jni/LuaJIT-2.0.1/src/host/minilua.c → jni/LuaJIT-2.1/src/host/minilua.c

@@ -27,6 +27,7 @@ typedef unsigned __int64 U64;
 #else
 #else
 typedef unsigned long long U64;
 typedef unsigned long long U64;
 #endif
 #endif
+int _CRT_glob = 0;
 #include <stddef.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <stdarg.h>
 #include <limits.h>
 #include <limits.h>

+ 0 - 0
jni/LuaJIT-2.0.1/src/jit/.gitignore → jni/LuaJIT-2.1/src/jit/.gitignore


+ 10 - 11
jni/LuaJIT-2.0.1/src/jit/bc.lua → jni/LuaJIT-2.1/src/jit/bc.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT bytecode listing module.
 -- LuaJIT bytecode listing module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 --
 --
@@ -41,7 +41,7 @@
 
 
 -- Cache some library functions and objects.
 -- Cache some library functions and objects.
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20001, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local vmdef = require("jit.vmdef")
 local bit = require("bit")
 local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-line = bcline
-dump = bcdump
-targets = bctargets
-
-on = bcliston
-off = bclistoff
-start = bcliston -- For -j command line option.
+return {
+  line = bcline,
+  dump = bcdump,
+  targets = bctargets,
+  on = bcliston,
+  off = bclistoff,
+  start = bcliston -- For -j command line option.
+}
 
 

+ 16 - 14
jni/LuaJIT-2.0.1/src/jit/bcsave.lua → jni/LuaJIT-2.1/src/jit/bcsave.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT module to save/list bytecode.
 -- LuaJIT module to save/list bytecode.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 --
 --
@@ -11,7 +11,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20001, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local bit = require("bit")
 local bit = require("bit")
 
 
 -- Symbol name prefix for LuaJIT bytecode.
 -- Symbol name prefix for LuaJIT bytecode.
@@ -63,13 +63,13 @@ local map_type = {
 }
 }
 
 
 local map_arch = {
 local map_arch = {
-  x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true,
+  x86 = true, x64 = true, arm = true, arm64 = true, ppc = true,
   mips = true, mipsel = true,
   mips = true, mipsel = true,
 }
 }
 
 
 local map_os = {
 local map_os = {
   linux = true, windows = true, osx = true, freebsd = true, netbsd = true,
   linux = true, windows = true, osx = true, freebsd = true, netbsd = true,
-  openbsd = true, solaris = true,
+  openbsd = true, dragonfly = true, solaris = true,
 }
 }
 
 
 local function checkarg(str, map, err)
 local function checkarg(str, map, err)
@@ -200,9 +200,9 @@ typedef struct {
 ]]
 ]]
   local symname = LJBC_PREFIX..ctx.modname
   local symname = LJBC_PREFIX..ctx.modname
   local is64, isbe = false, false
   local is64, isbe = false, false
-  if ctx.arch == "x64" then
+  if ctx.arch == "x64" or ctx.arch == "arm64" then
     is64 = true
     is64 = true
-  elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
+  elseif ctx.arch == "ppc" or ctx.arch == "mips" then
     isbe = true
     isbe = true
   end
   end
 
 
@@ -237,7 +237,7 @@ typedef struct {
   hdr.eendian = isbe and 2 or 1
   hdr.eendian = isbe and 2 or 1
   hdr.eversion = 1
   hdr.eversion = 1
   hdr.type = f16(1)
   hdr.type = f16(1)
-  hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch])
+  hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
   if ctx.arch == "mips" or ctx.arch == "mipsel" then
   if ctx.arch == "mips" or ctx.arch == "mipsel" then
     hdr.flags = 0x50001006
     hdr.flags = 0x50001006
   end
   end
@@ -477,13 +477,13 @@ typedef struct {
 } mach_obj_64;
 } mach_obj_64;
 typedef struct {
 typedef struct {
   mach_fat_header fat;
   mach_fat_header fat;
-  mach_fat_arch fat_arch[4];
+  mach_fat_arch fat_arch[2];
   struct {
   struct {
     mach_header hdr;
     mach_header hdr;
     mach_segment_command seg;
     mach_segment_command seg;
     mach_section sec;
     mach_section sec;
     mach_symtab_command sym;
     mach_symtab_command sym;
-  } arch[4];
+  } arch[2];
   mach_nlist sym_entry;
   mach_nlist sym_entry;
   uint8_t space[4096];
   uint8_t space[4096];
 } mach_fat_obj;
 } mach_fat_obj;
@@ -494,6 +494,8 @@ typedef struct {
     is64, align, mobj = true, 8, "mach_obj_64"
     is64, align, mobj = true, 8, "mach_obj_64"
   elseif ctx.arch == "arm" then
   elseif ctx.arch == "arm" then
     isfat, mobj = true, "mach_fat_obj"
     isfat, mobj = true, "mach_fat_obj"
+  elseif ctx.arch == "arm64" then
+    is64, align, isfat, mobj = true, 8, true, "mach_fat_obj"
   else
   else
     check(ctx.arch == "x86", "unsupported architecture for OSX")
     check(ctx.arch == "x86", "unsupported architecture for OSX")
   end
   end
@@ -503,8 +505,8 @@ typedef struct {
   -- Create Mach-O object and fill in header.
   -- Create Mach-O object and fill in header.
   local o = ffi.new(mobj)
   local o = ffi.new(mobj)
   local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
   local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
-  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch]
-  local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch]
+  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
+  local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
   if isfat then
   if isfat then
     o.fat.magic = be32(0xcafebabe)
     o.fat.magic = be32(0xcafebabe)
     o.fat.nfat_arch = be32(#cpusubtype)
     o.fat.nfat_arch = be32(#cpusubtype)
@@ -653,7 +655,7 @@ end
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-start = docmd -- Process -b command line option.
+return {
+  start = docmd -- Process -b command line option.
+}
 
 

+ 10 - 10
jni/LuaJIT-2.0.1/src/jit/dis_arm.lua → jni/LuaJIT-2.1/src/jit/dis_arm.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT ARM disassembler module.
 -- LuaJIT ARM disassembler module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = addr or 0
   ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 16 then return map_gpr[r] end
   if r < 16 then return map_gpr[r] end
   return "d"..(r-16)
   return "d"..(r-16)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 
 

+ 16 - 16
jni/LuaJIT-2.0.1/src/jit/dis_mips.lua → jni/LuaJIT-2.1/src/jit/dis_mips.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT MIPS disassembler module.
 -- LuaJIT MIPS disassembler module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -384,7 +384,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = addr or 0
   ctx.addr = addr or 0
@@ -396,33 +396,33 @@ local function create_(code, addr, out)
   return ctx
   return ctx
 end
 end
 
 
-local function create_el_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create_el(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.get = get_le
   ctx.get = get_le
   return ctx
   return ctx
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
-local function disass_el_(code, addr, out)
-  create_el_(code, addr, out):disass()
+local function disass_el(code, addr, out)
+  create_el(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
   return "f"..(r-32)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-create_el = create_el_
-disass = disass_
-disass_el = disass_el_
-regname = regname_
+return {
+  create = create,
+  create_el = create_el,
+  disass = disass,
+  disass_el = disass_el,
+  regname = regname
+}
 
 

+ 7 - 10
jni/LuaJIT-2.0.1/src/jit/dis_mipsel.lua → jni/LuaJIT-2.1/src/jit/dis_mipsel.lua

@@ -1,20 +1,17 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT MIPSEL disassembler wrapper module.
 -- LuaJIT MIPSEL disassembler wrapper module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This module just exports the little-endian functions from the
 -- This module just exports the little-endian functions from the
 -- MIPS disassembler module. All the interesting stuff is there.
 -- MIPS disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
-local require = require
-
-module(...)
-
-local dis_mips = require(_PACKAGE.."dis_mips")
-
-create = dis_mips.create_el
-disass = dis_mips.disass_el
-regname = dis_mips.regname
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create_el,
+  disass = dis_mips.disass_el,
+  regname = dis_mips.regname
+}
 
 

+ 10 - 10
jni/LuaJIT-2.0.1/src/jit/dis_ppc.lua → jni/LuaJIT-2.1/src/jit/dis_ppc.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT PPC disassembler module.
 -- LuaJIT PPC disassembler module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = addr or 0
   ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
   return "f"..(r-32)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 
 

+ 7 - 10
jni/LuaJIT-2.0.1/src/jit/dis_x64.lua → jni/LuaJIT-2.1/src/jit/dis_x64.lua

@@ -1,20 +1,17 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT x64 disassembler wrapper module.
 -- LuaJIT x64 disassembler wrapper module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This module just exports the 64 bit functions from the combined
 -- This module just exports the 64 bit functions from the combined
 -- x86/x64 disassembler module. All the interesting stuff is there.
 -- x86/x64 disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
-local require = require
-
-module(...)
-
-local dis_x86 = require(_PACKAGE.."dis_x86")
-
-create = dis_x86.create64
-disass = dis_x86.disass64
-regname = dis_x86.regname64
+local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
+return {
+  create = dis_x86.create64,
+  disass = dis_x86.disass64,
+  regname = dis_x86.regname64
+}
 
 

+ 176 - 89
jni/LuaJIT-2.0.1/src/jit/dis_x86.lua → jni/LuaJIT-2.1/src/jit/dis_x86.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT x86/x64 disassembler module.
 -- LuaJIT x86/x64 disassembler module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -15,19 +15,20 @@
 -- Intel and AMD manuals. The supported instruction set is quite extensive
 -- Intel and AMD manuals. The supported instruction set is quite extensive
 -- and reflects what a current generation Intel or AMD CPU implements in
 -- and reflects what a current generation Intel or AMD CPU implements in
 -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
 -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
--- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
--- instructions.
+-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
+-- (VMX/SVM) instructions.
 --
 --
 -- Notes:
 -- Notes:
 -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
 -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
 -- * No attempt at optimization has been made -- it's fast enough for my needs.
 -- * No attempt at optimization has been made -- it's fast enough for my needs.
--- * The public API may change when more architectures are added.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 local type = type
 local type = type
 local sub, byte, format = string.sub, string.byte, string.format
 local sub, byte, format = string.sub, string.byte, string.format
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 local lower, rep = string.lower, string.rep
 local lower, rep = string.lower, string.rep
+local bit = require("bit")
+local tohex = bit.tohex
 
 
 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
 local map_opc1_32 = {
 local map_opc1_32 = {
@@ -76,7 +77,7 @@ local map_opc1_32 = {
 "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
 "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
 "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
 "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
 --Cx
 --Cx
-"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
+"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
 "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
 "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
 --Dx
 --Dx
 "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
 "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({
   [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
   [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
   [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
   [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
   [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
   [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
-  [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
+  [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
   [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
   [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
 }, { __index = map_opc1_32 })
 }, { __index = map_opc1_32 })
 
 
@@ -112,12 +113,12 @@ local map_opc2 = {
 [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
 [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
 "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
 "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
 --1x
 --1x
-"movupsXrm|movssXrm|movupdXrm|movsdXrm",
-"movupsXmr|movssXmr|movupdXmr|movsdXmr",
+"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
+"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
 "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
 "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
 "movlpsXmr||movlpdXmr",
 "movlpsXmr||movlpdXmr",
-"unpcklpsXrm||unpcklpdXrm",
-"unpckhpsXrm||unpckhpdXrm",
+"unpcklpsXrvm||unpcklpdXrvm",
+"unpckhpsXrvm||unpckhpdXrvm",
 "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
 "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
 "movhpsXmr||movhpdXmr",
 "movhpsXmr||movhpdXmr",
 "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
 "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
@@ -126,7 +127,7 @@ local map_opc2 = {
 "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
 "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
 "movapsXrm||movapdXrm",
 "movapsXrm||movapdXrm",
 "movapsXmr||movapdXmr",
 "movapsXmr||movapdXmr",
-"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
+"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
 "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
 "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
 "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
 "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
 "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
 "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -142,27 +143,27 @@ local map_opc2 = {
 "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
 "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
 --5x
 --5x
 "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
 "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
-"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
-"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
-"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
-"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
-"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
+"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
+"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
+"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
+"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
+"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
 "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
 "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
-"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
-"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
+"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
+"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
 --6x
 --6x
-"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
-"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
-"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
-"||punpcklqdqXrm","||punpckhqdqXrm",
+"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
+"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
+"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
+"||punpcklqdqXrvm","||punpckhqdqXrvm",
 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
 --7x
 --7x
 "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
 "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
 "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
 "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
-"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
+"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
 nil,nil,
 nil,nil,
-"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
+"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
 "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
 "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
 --8x
 --8x
 "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
 "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -180,27 +181,27 @@ nil,nil,
 "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
 "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
 --Cx
 --Cx
 "xaddBmr","xaddVmr",
 "xaddBmr","xaddVmr",
-"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
-"pinsrwPrWmu","pextrwDrPmu",
-"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
+"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
+"pinsrwPrvWmu","pextrwDrPmu",
+"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
 "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
 "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
 --Dx
 --Dx
-"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
-"paddqPrm","pmullwPrm",
+"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
+"paddqPrvm","pmullwPrvm",
 "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
 "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
-"psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
-"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
+"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
+"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
 --Ex
 --Ex
-"pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
-"pmulhuwPrm","pmulhwPrm",
+"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
+"pmulhuwPrvm","pmulhwPrvm",
 "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
 "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
-"psubsbPrm","psubswPrm","pminswPrm","porPrm",
-"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
+"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
+"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
 --Fx
 --Fx
-"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
-"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
-"psubbPrm","psubwPrm","psubdPrm","psubqPrm",
-"paddbPrm","paddwPrm","padddPrm","ud",
+"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
+"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
+"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
+"paddbPrvm","paddwPrvm","padddPrvm","ud",
 }
 }
 assert(map_opc2[255] == "ud")
 assert(map_opc2[255] == "ud")
 
 
@@ -208,46 +209,62 @@ assert(map_opc2[255] == "ud")
 local map_opc3 = {
 local map_opc3 = {
 ["38"] = { -- [66] 0f 38 xx
 ["38"] = { -- [66] 0f 38 xx
 --0x
 --0x
-[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
-"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
-"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
-nil,nil,nil,nil,
+[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
+"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
+"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
+"||permilpsXrvm","||permilpdXrvm",nil,nil,
 --1x
 --1x
 "||pblendvbXrma",nil,nil,nil,
 "||pblendvbXrma",nil,nil,nil,
-"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
-nil,nil,nil,nil,
+"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
+"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
 "pabsbPrm","pabswPrm","pabsdPrm",nil,
 "pabsbPrm","pabswPrm","pabsdPrm",nil,
 --2x
 --2x
 "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
 "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
 "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
 "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
-"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
-nil,nil,nil,nil,
+"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
+"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
 --3x
 --3x
 "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
 "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
-"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
-"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
-"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
+"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
+"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
+"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
 --4x
 --4x
-"||pmulddXrm","||phminposuwXrm",
+"||pmulddXrvm","||phminposuwXrm",nil,nil,
+nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
+--5x
+[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
+[0x5a] = "||broadcasti128XrlXm",
+--7x
+[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
+--8x
+[0x8c] = "||pmaskmovXrvVSm",
+[0x8e] = "||pmaskmovVSmXvr",
 --Fx
 --Fx
 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
 },
 },
 
 
 ["3a"] = { -- [66] 0f 3a xx
 ["3a"] = { -- [66] 0f 3a xx
 --0x
 --0x
-[0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
-"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
-"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
+[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
+"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
+"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
+"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
 --1x
 --1x
 nil,nil,nil,nil,
 nil,nil,nil,nil,
 "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
 "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
-nil,nil,nil,nil,nil,nil,nil,nil,
+"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
+nil,nil,nil,nil,
 --2x
 --2x
-"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
+"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
+--3x
+[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
 --4x
 --4x
-[0x40] = "||dppsXrmu",
-[0x41] = "||dppdXrmu",
-[0x42] = "||mpsadbwXrmu",
+[0x40] = "||dppsXrvmu",
+[0x41] = "||dppdXrvmu",
+[0x42] = "||mpsadbwXrvmu",
+[0x46] = "||perm2i128Xrvmu",
+[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
+[0x4c] = "||pblendvbXrvmb",
 --6x
 --6x
 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
@@ -354,17 +371,19 @@ local map_regs = {
 	"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
 	"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
   X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
   X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
 	"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
 	"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
+  Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
+	"ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
 }
 }
 local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
 local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
 
 
 -- Maps for size names.
 -- Maps for size names.
 local map_sz2n = {
 local map_sz2n = {
-  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
+  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
 }
 }
 local map_sz2prefix = {
 local map_sz2prefix = {
   B = "byte", W = "word", D = "dword",
   B = "byte", W = "word", D = "dword",
   Q = "qword",
   Q = "qword",
-  M = "qword", X = "xword",
+  M = "qword", X = "xword", Y = "yword",
   F = "dword", G = "qword", -- No need for sizes/register names for these two.
   F = "dword", G = "qword", -- No need for sizes/register names for these two.
 }
 }
 
 
@@ -387,10 +406,13 @@ local function putop(ctx, text, operands)
   if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
   if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
   if ctx.rex then
   if ctx.rex then
     local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
     local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
-	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
-    if t ~= "" then text = "rex."..t.." "..text end
+	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
+	      (ctx.vexl and "l" or "")
+    if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
+    if t ~= "" then text = ctx.rex.."."..t.." "..text
+    elseif ctx.rex == "vex" then text = "v"..text end
     ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
     ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
-    ctx.rex = false
+    ctx.rex = false; ctx.vexl = false; ctx.vexv = false
   end
   end
   if ctx.seg then
   if ctx.seg then
     local text2, n = gsub(text, "%[", "["..ctx.seg..":")
     local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -405,6 +427,7 @@ local function putop(ctx, text, operands)
   end
   end
   ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
   ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
   ctx.mrm = false
   ctx.mrm = false
+  ctx.vexv = false
   ctx.start = pos
   ctx.start = pos
   ctx.imm = nil
   ctx.imm = nil
 end
 end
@@ -413,7 +436,7 @@ end
 local function clearprefixes(ctx)
 local function clearprefixes(ctx)
   ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
   ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
   ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
   ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
-  ctx.rex = false; ctx.a32 = false
+  ctx.rex = false; ctx.a32 = false; ctx.vexl = false
 end
 end
 
 
 -- Fallback for incomplete opcodes at the end.
 -- Fallback for incomplete opcodes at the end.
@@ -450,9 +473,9 @@ end
 -- Process pattern string and generate the operands.
 -- Process pattern string and generate the operands.
 local function putpat(ctx, name, pat)
 local function putpat(ctx, name, pat)
   local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
   local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
-  local code, pos, stop = ctx.code, ctx.pos, ctx.stop
+  local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
 
 
-  -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
+  -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
   for p in gmatch(pat, ".") do
   for p in gmatch(pat, ".") do
     local x = nil
     local x = nil
     if p == "V" or p == "U" then
     if p == "V" or p == "U" then
@@ -467,11 +490,13 @@ local function putpat(ctx, name, pat)
     elseif p == "B" then
     elseif p == "B" then
       sz = "B"
       sz = "B"
       regs = ctx.rex and map_regs.B64 or map_regs.B
       regs = ctx.rex and map_regs.B64 or map_regs.B
-    elseif match(p, "[WDQMXFG]") then
+    elseif match(p, "[WDQMXYFG]") then
       sz = p
       sz = p
+      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
       regs = map_regs[sz]
       regs = map_regs[sz]
     elseif p == "P" then
     elseif p == "P" then
       sz = ctx.o16 and "X" or "M"; ctx.o16 = false
       sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
       regs = map_regs[sz]
       regs = map_regs[sz]
     elseif p == "S" then
     elseif p == "S" then
       name = name..lower(sz)
       name = name..lower(sz)
@@ -484,6 +509,10 @@ local function putpat(ctx, name, pat)
       local imm = getimm(ctx, pos, 1); if not imm then return end
       local imm = getimm(ctx, pos, 1); if not imm then return end
       x = format("0x%02x", imm)
       x = format("0x%02x", imm)
       pos = pos+1
       pos = pos+1
+    elseif p == "b" then
+      local imm = getimm(ctx, pos, 1); if not imm then return end
+      x = regs[imm/16+1]
+      pos = pos+1
     elseif p == "w" then
     elseif p == "w" then
       local imm = getimm(ctx, pos, 2); if not imm then return end
       local imm = getimm(ctx, pos, 2); if not imm then return end
       x = format("0x%x", imm)
       x = format("0x%x", imm)
@@ -532,7 +561,7 @@ local function putpat(ctx, name, pat)
 	local lo = imm % 0x1000000
 	local lo = imm % 0x1000000
 	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
 	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
       else
       else
-	x = format("0x%08x", imm)
+	x = "0x"..tohex(imm)
       end
       end
     elseif p == "R" then
     elseif p == "R" then
       local r = byte(code, pos-1, pos-1)%8
       local r = byte(code, pos-1, pos-1)%8
@@ -616,8 +645,13 @@ local function putpat(ctx, name, pat)
 	else
 	else
 	  x = "CR"..sp
 	  x = "CR"..sp
 	end
 	end
+      elseif p == "v" then
+	if ctx.vexv then
+	  x = regs[ctx.vexv+1]; ctx.vexv = false
+	end
       elseif p == "y" then x = "DR"..sp
       elseif p == "y" then x = "DR"..sp
       elseif p == "z" then x = "TR"..sp
       elseif p == "z" then x = "TR"..sp
+      elseif p == "l" then vexl = false
       elseif p == "t" then
       elseif p == "t" then
       else
       else
 	error("bad pattern `"..pat.."'")
 	error("bad pattern `"..pat.."'")
@@ -692,7 +726,7 @@ map_act = {
   B = putpat, W = putpat, D = putpat, Q = putpat,
   B = putpat, W = putpat, D = putpat, Q = putpat,
   V = putpat, U = putpat, T = putpat,
   V = putpat, U = putpat, T = putpat,
   M = putpat, X = putpat, P = putpat,
   M = putpat, X = putpat, P = putpat,
-  F = putpat, G = putpat,
+  F = putpat, G = putpat, Y = putpat,
 
 
   -- Collect prefixes.
   -- Collect prefixes.
   [":"] = function(ctx, name, pat)
   [":"] = function(ctx, name, pat)
@@ -753,15 +787,68 @@ map_act = {
 
 
   -- REX prefix.
   -- REX prefix.
   rex = function(ctx, name, pat)
   rex = function(ctx, name, pat)
-    if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
+    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
     for p in gmatch(pat, ".") do ctx["rex"..p] = true end
     for p in gmatch(pat, ".") do ctx["rex"..p] = true end
-    ctx.rex = true
+    ctx.rex = "rex"
+  end,
+
+  -- VEX prefix.
+  vex = function(ctx, name, pat)
+    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
+    ctx.rex = "vex"
+    local pos = ctx.pos
+    if ctx.mrm then
+      ctx.mrm = nil
+      pos = pos-1
+    end
+    local b = byte(ctx.code, pos, pos)
+    if not b then return incomplete(ctx) end
+    pos = pos+1
+    if b < 128 then ctx.rexr = true end
+    local m = 1
+    if pat == "3" then
+      m = b%32; b = (b-m)/32
+      local nb = b%2; b = (b-nb)/2
+      if nb == 0 then ctx.rexb = true end
+      local nx = b%2; b = (b-nx)/2
+      if nx == 0 then ctx.rexx = true end
+      b = byte(ctx.code, pos, pos)
+      if not b then return incomplete(ctx) end
+      pos = pos+1
+      if b >= 128 then ctx.rexw = true end
+    end
+    ctx.pos = pos
+    local map
+    if m == 1 then map = map_opc2
+    elseif m == 2 then map = map_opc3["38"]
+    elseif m == 3 then map = map_opc3["3a"]
+    else return unknown(ctx) end
+    local p = b%4; b = (b-p)/4
+    if p == 1 then ctx.o16 = "o16"
+    elseif p == 2 then ctx.rep = "rep"
+    elseif p == 3 then ctx.rep = "repne" end
+    local l = b%2; b = (b-l)/2
+    if l ~= 0 then ctx.vexl = true end
+    ctx.vexv = (-1-b)%16
+    return dispatchmap(ctx, map)
   end,
   end,
 
 
   -- Special case for nop with REX prefix.
   -- Special case for nop with REX prefix.
   nop = function(ctx, name, pat)
   nop = function(ctx, name, pat)
     return dispatch(ctx, ctx.rex and pat or "nop")
     return dispatch(ctx, ctx.rex and pat or "nop")
   end,
   end,
+
+  -- Special case for 0F 77.
+  emms = function(ctx, name, pat)
+    if ctx.rex ~= "vex" then
+      return putop(ctx, "emms")
+    elseif ctx.vexl then
+      ctx.vexl = false
+      return putop(ctx, "zeroall")
+    else
+      return putop(ctx, "zeroupper")
+    end
+  end,
 }
 }
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
@@ -782,7 +869,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = (addr or 0) - 1
   ctx.addr = (addr or 0) - 1
@@ -796,8 +883,8 @@ local function create_(code, addr, out)
   return ctx
   return ctx
 end
 end
 
 
-local function create64_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create64(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.x64 = true
   ctx.x64 = true
   ctx.map1 = map_opc1_64
   ctx.map1 = map_opc1_64
   ctx.aregs = map_regs.Q
   ctx.aregs = map_regs.Q
@@ -805,32 +892,32 @@ local function create64_(code, addr, out)
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
-local function disass64_(code, addr, out)
-  create64_(code, addr, out):disass()
+local function disass64(code, addr, out)
+  create64(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 8 then return map_regs.D[r+1] end
   if r < 8 then return map_regs.D[r+1] end
   return map_regs.X[r-7]
   return map_regs.X[r-7]
 end
 end
 
 
-local function regname64_(r)
+local function regname64(r)
   if r < 16 then return map_regs.Q[r+1] end
   if r < 16 then return map_regs.Q[r+1] end
   return map_regs.X[r-15]
   return map_regs.X[r-15]
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-create64 = create64_
-disass = disass_
-disass64 = disass64_
-regname = regname_
-regname64 = regname64_
+return {
+  create = create,
+  create64 = create64,
+  disass = disass,
+  disass64 = disass64,
+  regname = regname,
+  regname64 = regname64
+}
 
 

+ 21 - 14
jni/LuaJIT-2.0.1/src/jit/dump.lua → jni/LuaJIT-2.1/src/jit/dump.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT compiler dump module.
 -- LuaJIT compiler dump module.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 --
 --
@@ -36,6 +36,7 @@
 --  * m  Dump the generated machine code.
 --  * m  Dump the generated machine code.
 --    x  Print each taken trace exit.
 --    x  Print each taken trace exit.
 --    X  Print each taken trace exit and the contents of all registers.
 --    X  Print each taken trace exit and the contents of all registers.
+--    a  Print the IR of aborted traces, too.
 --
 --
 -- The output format can be set with the following characters:
 -- The output format can be set with the following characters:
 --
 --
@@ -54,7 +55,7 @@
 
 
 -- Cache some library functions and objects.
 -- Cache some library functions and objects.
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20001, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local vmdef = require("jit.vmdef")
 local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
 local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -62,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
 local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
 local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
 local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
 local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
 local bit = require("bit")
 local bit = require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
 local sub, gsub, format = string.sub, string.gsub, string.format
 local sub, gsub, format = string.sub, string.gsub, string.format
 local byte, char, rep = string.byte, string.char, string.rep
 local byte, char, rep = string.byte, string.char, string.rep
 local type, tostring = type, tostring
 local type, tostring = type, tostring
@@ -90,6 +91,7 @@ local function fillsymtab_tr(tr, nexit)
   end
   end
   for i=0,nexit-1 do
   for i=0,nexit-1 do
     local addr = traceexitstub(tr, i)
     local addr = traceexitstub(tr, i)
+    if addr < 0 then addr = addr + 2^32 end
     t[addr] = tostring(i)
     t[addr] = tostring(i)
   end
   end
   local addr = traceexitstub(tr, nexit)
   local addr = traceexitstub(tr, nexit)
@@ -103,7 +105,10 @@ local function fillsymtab(tr, nexit)
     local ircall = vmdef.ircall
     local ircall = vmdef.ircall
     for i=0,#ircall do
     for i=0,#ircall do
       local addr = ircalladdr(i)
       local addr = ircalladdr(i)
-      if addr ~= 0 then t[addr] = ircall[i] end
+      if addr ~= 0 then
+	if addr < 0 then addr = addr + 2^32 end
+	t[addr] = ircall[i]
+      end
     end
     end
   end
   end
   if nexitsym == 1000000 then -- Per-trace exit stubs.
   if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -117,6 +122,7 @@ local function fillsymtab(tr, nexit)
 	nexit = 1000000
 	nexit = 1000000
 	break
 	break
       end
       end
+      if addr < 0 then addr = addr + 2^32 end
       t[addr] = tostring(i)
       t[addr] = tostring(i)
     end
     end
     nexitsym = nexit
     nexitsym = nexit
@@ -135,6 +141,7 @@ local function dump_mcode(tr)
   local mcode, addr, loop = tracemc(tr)
   local mcode, addr, loop = tracemc(tr)
   if not mcode then return end
   if not mcode then return end
   if not disass then disass = require("jit.dis_"..jit.arch) end
   if not disass then disass = require("jit.dis_"..jit.arch) end
+  if addr < 0 then addr = addr + 2^32 end
   out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
   out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
   local ctx = disass.create(mcode, addr, dumpwrite)
   local ctx = disass.create(mcode, addr, dumpwrite)
   ctx.hexdump = 0
   ctx.hexdump = 0
@@ -269,8 +276,7 @@ local litname = {
   ["CONV  "] = setmetatable({}, { __index = function(t, mode)
   ["CONV  "] = setmetatable({}, { __index = function(t, mode)
     local s = irtype[band(mode, 31)]
     local s = irtype[band(mode, 31)]
     s = irtype[band(shr(mode, 5), 31)].."."..s
     s = irtype[band(shr(mode, 5), 31)].."."..s
-    if band(mode, 0x400) ~= 0 then s = s.." trunc"
-    elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
+    if band(mode, 0x800) ~= 0 then s = s.." sext" end
     local c = shr(mode, 14)
     local c = shr(mode, 14)
     if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
     if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
     t[mode] = s
     t[mode] = s
@@ -279,6 +285,8 @@ local litname = {
   ["FLOAD "] = vmdef.irfield,
   ["FLOAD "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FPMATH"] = vmdef.irfpm,
   ["FPMATH"] = vmdef.irfpm,
+  ["BUFHDR"] = { [0] = "RESET", "APPEND" },
+  ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
 }
 }
 
 
 local function ctlsub(c)
 local function ctlsub(c)
@@ -546,10 +554,8 @@ local function dump_trace(what, tr, func, pc, otr, oex)
     out:write("---- TRACE ", tr, " ", what)
     out:write("---- TRACE ", tr, " ", what)
     if otr then out:write(" ", otr, "/", oex) end
     if otr then out:write(" ", otr, "/", oex) end
     out:write(" ", fmtfunc(func, pc), "\n")
     out:write(" ", fmtfunc(func, pc), "\n")
-    recprefix = ""
   elseif what == "stop" or what == "abort" then
   elseif what == "stop" or what == "abort" then
     out:write("---- TRACE ", tr, " ", what)
     out:write("---- TRACE ", tr, " ", what)
-    recprefix = nil
     if what == "abort" then
     if what == "abort" then
       out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
       out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
     else
     else
@@ -565,6 +571,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
     end
     end
     if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
     if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
   else
   else
+    if what == "flush" then symtab, nexitsym = {}, 0 end
     out:write("---- TRACE ", what, "\n\n")
     out:write("---- TRACE ", what, "\n\n")
   end
   end
   out:flush()
   out:flush()
@@ -608,7 +615,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
       end
       end
     else
     else
       for i=1,ngpr do
       for i=1,ngpr do
-	out:write(format(" %08x", regs[i]))
+	out:write(" ", tohex(regs[i]))
 	if i % 8 == 0 then out:write("\n") end
 	if i % 8 == 0 then out:write("\n") end
       end
       end
     end
     end
@@ -692,9 +699,9 @@ local function dumpon(opt, outfile)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 
 

+ 310 - 0
jni/LuaJIT-2.1/src/jit/p.lua

@@ -0,0 +1,310 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module is a simple command line interface to the built-in
+-- low-overhead profiler of LuaJIT.
+--
+-- The lower-level API of the profiler is accessible via the "jit.profile"
+-- module or the luaJIT_profile_* C API.
+--
+-- Example usage:
+--
+--   luajit -jp myapp.lua
+--   luajit -jp=s myapp.lua
+--   luajit -jp=-s myapp.lua
+--   luajit -jp=vl myapp.lua
+--   luajit -jp=G,profile.txt myapp.lua
+--
+-- The following dump features are available:
+--
+--   f  Stack dump: function name, otherwise module:line. Default mode.
+--   F  Stack dump: ditto, but always prepend module.
+--   l  Stack dump: module:line.
+--   <number> stack dump depth (callee < caller). Default: 1.
+--   -<number> Inverse stack dump depth (caller > callee).
+--   s  Split stack dump after first stack level. Implies abs(depth) >= 2.
+--   p  Show full path for module names.
+--   v  Show VM states. Can be combined with stack dumps, e.g. vf or fv.
+--   z  Show zones. Can be combined with stack dumps, e.g. zf or fz.
+--   r  Show raw sample counts. Default: show percentages.
+--   a  Annotate excerpts from source code files.
+--   A  Annotate complete source code files.
+--   G  Produce raw output suitable for graphical tools (e.g. flame graphs).
+--   m<number> Minimum sample percentage to be shown. Default: 3.
+--   i<number> Sampling interval in milliseconds. Default: 10.
+--
+----------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
+local profile = require("jit.profile")
+local vmdef = require("jit.vmdef")
+local math = math
+local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
+local sort, format = table.sort, string.format
+local stdout = io.stdout
+local zone -- Load jit.zone module on demand.
+
+-- Output file handle.
+local out
+
+------------------------------------------------------------------------------
+
+local prof_ud
+local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
+local prof_ann, prof_count1, prof_count2, prof_samples
+
+local map_vmmode = {
+  N = "Compiled",
+  I = "Interpreted",
+  C = "C code",
+  G = "Garbage Collector",
+  J = "JIT Compiler",
+}
+
+-- Profiler callback.
+local function prof_cb(th, samples, vmmode)
+  prof_samples = prof_samples + samples
+  local key_stack, key_stack2, key_state
+  -- Collect keys for sample.
+  if prof_states then
+    if prof_states == "v" then
+      key_state = map_vmmode[vmmode] or vmmode
+    else
+      key_state = zone:get() or "(none)"
+    end
+  end
+  if prof_fmt then
+    key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
+    key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
+      return vmdef.ffnames[tonumber(x)]
+    end)
+    if prof_split == 2 then
+      local k1, k2 = key_stack:match("(.-) [<>] (.*)")
+      if k2 then key_stack, key_stack2 = k1, k2 end
+    elseif prof_split == 3 then
+      key_stack2 = profile.dumpstack(th, "l", 1)
+    end
+  end
+  -- Order keys.
+  local k1, k2
+  if prof_split == 1 then
+    if key_state then
+      k1 = key_state
+      if key_stack then k2 = key_stack end
+    end
+  elseif key_stack then
+    k1 = key_stack
+    if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
+  end
+  -- Coalesce samples in one or two levels.
+  if k1 then
+    local t1 = prof_count1
+    t1[k1] = (t1[k1] or 0) + samples
+    if k2 then
+      local t2 = prof_count2
+      local t3 = t2[k1]
+      if not t3 then t3 = {}; t2[k1] = t3 end
+      t3[k2] = (t3[k2] or 0) + samples
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Show top N list.
+local function prof_top(count1, count2, samples, indent)
+  local t, n = {}, 0
+  for k, v in pairs(count1) do
+    n = n + 1
+    t[n] = k
+  end
+  sort(t, function(a, b) return count1[a] > count1[b] end)
+  for i=1,n do
+    local k = t[i]
+    local v = count1[k]
+    local pct = floor(v*100/samples + 0.5)
+    if pct < prof_min then break end
+    if not prof_raw then
+      out:write(format("%s%2d%%  %s\n", indent, pct, k))
+    elseif prof_raw == "r" then
+      out:write(format("%s%5d  %s\n", indent, v, k))
+    else
+      out:write(format("%s %d\n", k, v))
+    end
+    if count2 then
+      local r = count2[k]
+      if r then
+	prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and "  -- " or
+			    (prof_depth < 0 and "  -> " or "  <- "))
+      end
+    end
+  end
+end
+
+-- Annotate source code
+local function prof_annotate(count1, samples)
+  local files = {}
+  local ms = 0
+  for k, v in pairs(count1) do
+    local pct = floor(v*100/samples + 0.5)
+    ms = math.max(ms, v)
+    if pct >= prof_min then
+      local file, line = k:match("^(.*):(%d+)$")
+      local fl = files[file]
+      if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
+      line = tonumber(line)
+      fl[line] = prof_raw and v or pct
+    end
+  end
+  sort(files)
+  local fmtv, fmtn = " %3d%% | %s\n", "      | %s\n"
+  if prof_raw then
+    local n = math.max(5, math.ceil(math.log10(ms)))
+    fmtv = "%"..n.."d | %s\n"
+    fmtn = (" "):rep(n).." | %s\n"
+  end
+  local ann = prof_ann
+  for _, file in ipairs(files) do
+    local f0 = file:byte()
+    if f0 == 40 or f0 == 91 then
+      out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
+      break
+    end
+    local fp, err = io.open(file)
+    if not fp then
+      out:write(format("====== ERROR: %s: %s\n", file, err))
+      break
+    end
+    out:write(format("\n====== %s ======\n", file))
+    local fl = files[file]
+    local n, show = 1, false
+    if ann ~= 0 then
+      for i=1,ann do
+	if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
+      end
+    end
+    for line in fp:lines() do
+      if line:byte() == 27 then
+	out:write("[Cannot annotate bytecode file]\n")
+	break
+      end
+      local v = fl[n]
+      if ann ~= 0 then
+	local v2 = fl[n+ann]
+	if show then
+	  if v2 then show = n+ann elseif v then show = n
+	  elseif show+ann < n then show = false end
+	elseif v2 then
+	  show = n+ann
+	  out:write(format("@@ %d @@\n", n))
+	end
+	if not show then goto next end
+      end
+      if v then
+	out:write(format(fmtv, v, line))
+      else
+	out:write(format(fmtn, line))
+      end
+    ::next::
+      n = n + 1
+    end
+    fp:close()
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Finish profiling and dump result.
+local function prof_finish()
+  if prof_ud then
+    profile.stop()
+    local samples = prof_samples
+    if samples == 0 then
+      if prof_raw ~= true then out:write("[No samples collected]\n") end
+      return
+    end
+    if prof_ann then
+      prof_annotate(prof_count1, samples)
+    else
+      prof_top(prof_count1, prof_count2, samples, "")
+    end
+    prof_count1 = nil
+    prof_count2 = nil
+    prof_ud = nil
+  end
+end
+
+-- Start profiling.
+local function prof_start(mode)
+  local interval = ""
+  mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
+  prof_min = 3
+  mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
+  prof_depth = 1
+  mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
+  local m = {}
+  for c in mode:gmatch(".") do m[c] = c end
+  prof_states = m.z or m.v
+  if prof_states == "z" then zone = require("jit.zone") end
+  local scope = m.l or m.f or m.F or (prof_states and "" or "f")
+  local flags = (m.p or "")
+  prof_raw = m.r
+  if m.s then
+    prof_split = 2
+    if prof_depth == -1 or m["-"] then prof_depth = -2
+    elseif prof_depth == 1 then prof_depth = 2 end
+  elseif mode:find("[fF].*l") then
+    scope = "l"
+    prof_split = 3
+  else
+    prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
+  end
+  prof_ann = m.A and 0 or (m.a and 3)
+  if prof_ann then
+    scope = "l"
+    prof_fmt = "pl"
+    prof_split = 0
+    prof_depth = 1
+  elseif m.G and scope ~= "" then
+    prof_fmt = flags..scope.."Z;"
+    prof_depth = -100
+    prof_raw = true
+    prof_min = 0
+  elseif scope == "" then
+    prof_fmt = false
+  else
+    local sc = prof_split == 3 and m.f or m.F or scope
+    prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
+  end
+  prof_count1 = {}
+  prof_count2 = {}
+  prof_samples = 0
+  profile.start(scope:lower()..interval, prof_cb)
+  prof_ud = newproxy(true)
+  getmetatable(prof_ud).__gc = prof_finish
+end
+
+------------------------------------------------------------------------------
+
+local function start(mode, outfile)
+  if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
+  if outfile then
+    out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+  else
+    out = stdout
+  end
+  prof_start(mode or "f")
+end
+
+-- Public module functions.
+return {
+  start = start, -- For -j command line option.
+  stop = prof_finish
+}
+

+ 10 - 7
jni/LuaJIT-2.0.1/src/jit/v.lua → jni/LuaJIT-2.1/src/jit/v.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- Verbose mode of the LuaJIT compiler.
 -- Verbose mode of the LuaJIT compiler.
 --
 --
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 --
 --
@@ -59,7 +59,7 @@
 
 
 -- Cache some library functions and objects.
 -- Cache some library functions and objects.
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20001, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local vmdef = require("jit.vmdef")
 local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
 local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
       if ltype == "interpreter" then
       if ltype == "interpreter" then
 	out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
 	out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
 	  tr, startex, startloc))
 	  tr, startex, startloc))
+      elseif ltype == "stitch" then
+	out:write(format("[TRACE %3s %s%s %s %s]\n",
+	  tr, startex, startloc, ltype, fmtfunc(func, pc)))
       elseif link == tr or link == 0 then
       elseif link == tr or link == 0 then
 	out:write(format("[TRACE %3s %s%s %s]\n",
 	out:write(format("[TRACE %3s %s%s %s]\n",
 	  tr, startex, startloc, ltype))
 	  tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 
 

+ 45 - 0
jni/LuaJIT-2.1/src/jit/zone.lua

@@ -0,0 +1,45 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler zones.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module implements a simple hierarchical zone model.
+--
+-- Example usage:
+--
+--   local zone = require("jit.zone")
+--   zone("AI")
+--   ...
+--     zone("A*")
+--     ...
+--     print(zone:get()) --> "A*"
+--     ...
+--     zone()
+--   ...
+--   print(zone:get()) --> "AI"
+--   ...
+--   zone()
+--
+----------------------------------------------------------------------------
+
+local remove = table.remove
+
+return setmetatable({
+  flush = function(t)
+    for i=#t,1,-1 do t[i] = nil end
+  end,
+  get = function(t)
+    return t[#t]
+  end
+}, {
+  __call = function(t, zone)
+    if zone then
+      t[#t+1] = zone
+    else
+      return (assert(remove(t), "empty zone stack"))
+    end
+  end
+})
+

+ 0 - 0
jni/LuaJIT-2.0.1/src/lauxlib.h → jni/LuaJIT-2.1/src/lauxlib.h


+ 2 - 2
jni/LuaJIT-2.0.1/src/lib_aux.c → jni/LuaJIT-2.1/src/lib_aux.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Auxiliary library for the Lua/C API.
 ** Auxiliary library for the Lua/C API.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major parts taken verbatim or adapted from the Lua interpreter.
 ** Major parts taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -302,7 +302,7 @@ static int panic(lua_State *L)
 
 
 #ifdef LUAJIT_USE_SYSMALLOC
 #ifdef LUAJIT_USE_SYSMALLOC
 
 
-#if LJ_64
+#if LJ_64 && !defined(LUAJIT_USE_VALGRIND)
 #error "Must use builtin allocator for 64 bit target"
 #error "Must use builtin allocator for 64 bit target"
 #endif
 #endif
 
 

+ 34 - 53
jni/LuaJIT-2.0.1/src/lib_base.c → jni/LuaJIT-2.1/src/lib_base.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Base and coroutine library.
 ** Base and coroutine library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -32,6 +32,7 @@
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
 #include "lj_char.h"
 #include "lj_char.h"
 #include "lj_strscan.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* -- Base library: checks ------------------------------------------------ */
 /* -- Base library: checks ------------------------------------------------ */
@@ -86,10 +87,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
   cTValue *mo = lj_meta_lookup(L, o, mm);
   cTValue *mo = lj_meta_lookup(L, o, mm);
   if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
   if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
     L->top = o+1;  /* Only keep one argument. */
     L->top = o+1;  /* Only keep one argument. */
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
     return FFH_TAILCALL;
   } else {
   } else {
     if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
     if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
+    if (LJ_FR2) { copyTV(L, o-1, o); o--; }
     setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
     setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
     if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
     if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
     return FFH_RES(3);
     return FFH_RES(3);
@@ -100,7 +102,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
 #endif
 #endif
 
 
 LJLIB_PUSH(lastcl)
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(pairs)
+LJLIB_ASM(pairs)		LJLIB_REC(xpairs 0)
 {
 {
   return ffh_pairs(L, MM_pairs);
   return ffh_pairs(L, MM_pairs);
 }
 }
@@ -113,7 +115,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux)	LJLIB_REC(.)
 }
 }
 
 
 LJLIB_PUSH(lastcl)
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(ipairs)		LJLIB_REC(.)
+LJLIB_ASM(ipairs)		LJLIB_REC(xpairs 1)
 {
 {
   return ffh_pairs(L, MM_ipairs);
   return ffh_pairs(L, MM_ipairs);
 }
 }
@@ -131,11 +133,11 @@ LJLIB_ASM(setmetatable)		LJLIB_REC(.)
     lj_err_caller(L, LJ_ERR_PROTMT);
     lj_err_caller(L, LJ_ERR_PROTMT);
   setgcref(t->metatable, obj2gco(mt));
   setgcref(t->metatable, obj2gco(mt));
   if (mt) { lj_gc_objbarriert(L, t, mt); }
   if (mt) { lj_gc_objbarriert(L, t, mt); }
-  settabV(L, L->base-1, t);
+  settabV(L, L->base-1-LJ_FR2, t);
   return FFH_RES(1);
   return FFH_RES(1);
 }
 }
 
 
-LJLIB_CF(getfenv)
+LJLIB_CF(getfenv)		LJLIB_REC(.)
 {
 {
   GCfunc *fn;
   GCfunc *fn;
   cTValue *o = L->base;
   cTValue *o = L->base;
@@ -144,6 +146,7 @@ LJLIB_CF(getfenv)
     o = lj_debug_frame(L, level, &level);
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   }
   fn = &gcval(o)->fn;
   fn = &gcval(o)->fn;
   settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
   settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +168,7 @@ LJLIB_CF(setfenv)
     o = lj_debug_frame(L, level, &level);
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   }
   fn = &gcval(o)->fn;
   fn = &gcval(o)->fn;
   if (!isluafunc(fn))
   if (!isluafunc(fn))
@@ -257,7 +261,7 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
   if (base == 10) {
   if (base == 10) {
     TValue *o = lj_lib_checkany(L, 1);
     TValue *o = lj_lib_checkany(L, 1);
     if (lj_strscan_numberobj(o)) {
     if (lj_strscan_numberobj(o)) {
-      copyTV(L, L->base-1, o);
+      copyTV(L, L->base-1-LJ_FR2, o);
       return FFH_RES(1);
       return FFH_RES(1);
     }
     }
 #if LJ_HASFFI
 #if LJ_HASFFI
@@ -270,11 +274,11 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
 	    ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
 	    ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
 	  int32_t i;
 	  int32_t i;
 	  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
 	  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
-	  setintV(L->base-1, i);
+	  setintV(L->base-1-LJ_FR2, i);
 	  return FFH_RES(1);
 	  return FFH_RES(1);
 	}
 	}
 	lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
 	lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
-		       (uint8_t *)&(L->base-1)->n, o, 0);
+		       (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
 	return FFH_RES(1);
 	return FFH_RES(1);
       }
       }
     }
     }
@@ -290,45 +294,29 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
       while (lj_char_isspace((unsigned char)(*ep))) ep++;
       while (lj_char_isspace((unsigned char)(*ep))) ep++;
       if (*ep == '\0') {
       if (*ep == '\0') {
 	if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
 	if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
-	  setintV(L->base-1, (int32_t)ul);
+	  setintV(L->base-1-LJ_FR2, (int32_t)ul);
 	else
 	else
-	  setnumV(L->base-1, (lua_Number)ul);
+	  setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
 	return FFH_RES(1);
 	return FFH_RES(1);
       }
       }
     }
     }
   }
   }
-  setnilV(L->base-1);
+  setnilV(L->base-1-LJ_FR2);
   return FFH_RES(1);
   return FFH_RES(1);
 }
 }
 
 
-LJLIB_PUSH("nil")
-LJLIB_PUSH("false")
-LJLIB_PUSH("true")
 LJLIB_ASM(tostring)		LJLIB_REC(.)
 LJLIB_ASM(tostring)		LJLIB_REC(.)
 {
 {
   TValue *o = lj_lib_checkany(L, 1);
   TValue *o = lj_lib_checkany(L, 1);
   cTValue *mo;
   cTValue *mo;
   L->top = o+1;  /* Only keep one argument. */
   L->top = o+1;  /* Only keep one argument. */
   if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
   if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
     return FFH_TAILCALL;
-  } else {
-    GCstr *s;
-    if (tvisnumber(o)) {
-      s = lj_str_fromnumber(L, o);
-    } else if (tvispri(o)) {
-      s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
-    } else {
-      if (tvisfunc(o) && isffunc(funcV(o)))
-	lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
-      else
-	lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
-      /* Note: lua_pushfstring calls the GC which may invalidate o. */
-      s = strV(L->top-1);
-    }
-    setstrV(L, L->base-1, s);
-    return FFH_RES(1);
   }
   }
+  lj_gc_check(L);
+  setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
+  return FFH_RES(1);
 }
 }
 
 
 /* -- Base library: throw and catch errors -------------------------------- */
 /* -- Base library: throw and catch errors -------------------------------- */
@@ -440,20 +428,20 @@ LJLIB_CF(dofile)
 
 
 LJLIB_CF(gcinfo)
 LJLIB_CF(gcinfo)
 {
 {
-  setintV(L->top++, (G(L)->gc.total >> 10));
+  setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
   return 1;
   return 1;
 }
 }
 
 
 LJLIB_CF(collectgarbage)
 LJLIB_CF(collectgarbage)
 {
 {
   int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT,  /* ORDER LUA_GC* */
   int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT,  /* ORDER LUA_GC* */
-    "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul");
+    "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning");
   int32_t data = lj_lib_optint(L, 2, 0);
   int32_t data = lj_lib_optint(L, 2, 0);
   if (opt == LUA_GCCOUNT) {
   if (opt == LUA_GCCOUNT) {
     setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
     setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
   } else {
   } else {
     int res = lua_gc(L, opt, data);
     int res = lua_gc(L, opt, data);
-    if (opt == LUA_GCSTEP)
+    if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING)
       setboolV(L->top, res);
       setboolV(L->top, res);
     else
     else
       setintV(L->top, res);
       setintV(L->top, res);
@@ -506,21 +494,13 @@ LJLIB_CF(print)
   }
   }
   shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
   shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
   for (i = 0; i < nargs; i++) {
   for (i = 0; i < nargs; i++) {
+    cTValue *o = &L->base[i];
+    char buf[STRFMT_MAXBUF_NUM];
     const char *str;
     const char *str;
     size_t size;
     size_t size;
-    cTValue *o = &L->base[i];
-    if (shortcut && tvisstr(o)) {
-      str = strVdata(o);
-      size = strV(o)->len;
-    } else if (shortcut && tvisint(o)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(o));
-      size = (size_t)(buf+LJ_STR_INTBUF-p);
-      str = p;
-    } else if (shortcut && tvisnum(o)) {
-      char buf[LJ_STR_NUMBUF];
-      size = lj_str_bufnum(buf, o);
-      str = buf;
+    MSize len;
+    if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) {
+      size = len;
     } else {
     } else {
       copyTV(L, L->top+1, o);
       copyTV(L, L->top+1, o);
       copyTV(L, L->top, L->top-1);
       copyTV(L, L->top, L->top-1);
@@ -558,7 +538,7 @@ LJLIB_CF(coroutine_status)
   if (co == L) s = "running";
   if (co == L) s = "running";
   else if (co->status == LUA_YIELD) s = "suspended";
   else if (co->status == LUA_YIELD) s = "suspended";
   else if (co->status != 0) s = "dead";
   else if (co->status != 0) s = "dead";
-  else if (co->base > tvref(co->stack)+1) s = "normal";
+  else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
   else if (co->top == co->base) s = "dead";
   else if (co->top == co->base) s = "dead";
   else s = "suspended";
   else s = "suspended";
   lua_pushstring(L, s);
   lua_pushstring(L, s);
@@ -600,8 +580,8 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
       (co->status == 0 && co->top == co->base)) {
       (co->status == 0 && co->top == co->base)) {
     ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
     ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
     if (wrap) lj_err_caller(L, em);
     if (wrap) lj_err_caller(L, em);
-    setboolV(L->base-1, 0);
-    setstrV(L, L->base, lj_err_str(L, em));
+    setboolV(L->base-1-LJ_FR2, 0);
+    setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
     return FFH_RES(2);
     return FFH_RES(2);
   }
   }
   lj_state_growstack(co, (MSize)(L->top - L->base));
   lj_state_growstack(co, (MSize)(L->top - L->base));
@@ -642,9 +622,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
 
 
 LJLIB_CF(coroutine_wrap)
 LJLIB_CF(coroutine_wrap)
 {
 {
+  GCfunc *fn;
   lj_cf_coroutine_create(L);
   lj_cf_coroutine_create(L);
-  lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
-  setpc_wrap_aux(L, funcV(L->top-1));
+  fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
+  setpc_wrap_aux(L, fn);
   return 1;
   return 1;
 }
 }
 
 

+ 180 - 0
jni/LuaJIT-2.1/src/lib_bit.c

@@ -0,0 +1,180 @@
+/*
+** Bit manipulation library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_bit_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_strscan.h"
+#include "lj_strfmt.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#include "lj_carith.h"
+#endif
+#include "lj_ff.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_bit
+
+#if LJ_HASFFI
+static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
+{
+  GCcdata *cd = lj_cdata_new_(L, id, 8);
+  *(uint64_t *)cdataptr(cd) = x;
+  setcdataV(L, L->base-1-LJ_FR2, cd);
+  return FFH_RES(1);
+}
+#else
+static int32_t bit_checkbit(lua_State *L, int narg)
+{
+  TValue *o = L->base + narg-1;
+  if (!(o < L->top && lj_strscan_numberobj(o)))
+    lj_err_argt(L, narg, LUA_TNUMBER);
+  if (LJ_LIKELY(tvisint(o))) {
+    return intV(o);
+  } else {
+    int32_t i = lj_num2bit(numV(o));
+    if (LJ_DUALNUM) setintV(o, i);
+    return i;
+  }
+}
+#endif
+
+LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_tobit)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
+  return FFH_RES(1);
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, ~x) : FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
+{
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
+  if (id) {
+    x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
+    return bit_result64(L, id, x);
+  }
+  if (id2) setintV(L->base+1, sh);
+  return FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  bit_checkbit(L, 2);
+  return FFH_RETRY;
+#endif
+}
+LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
+LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
+LJLIB_ASM_(bit_rol)		LJLIB_REC(bit_shift IR_BROL)
+LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
+
+LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  TValue *o = L->base, *top = L->top;
+  int i = 0;
+  do { lj_carith_check64(L, ++i, &id); } while (++o < top);
+  if (id) {
+    CTState *cts = ctype_cts(L);
+    CType *ct = ctype_get(cts, id);
+    int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
+    uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
+    o = L->base;
+    do {
+      lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
+      if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
+    } while (++o < top);
+    return bit_result64(L, id, y);
+  }
+  return FFH_RETRY;
+#else
+  int i = 0;
+  do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
+  return FFH_RETRY;
+#endif
+}
+LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
+LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(bit_tohex)		LJLIB_REC(.)
+{
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t b = lj_carith_check64(L, 1, &id);
+  int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
+				  (int32_t)lj_carith_check64(L, 2, &id2);
+#else
+  uint32_t b = (uint32_t)bit_checkbit(L, 1);
+  int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
+#endif
+  SBuf *sb = lj_buf_tmp_(L);
+  SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
+  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
+#if LJ_HASFFI
+  if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
+#else
+  if (n < 8) b &= (1u << 4*n)-1;
+#endif
+  sb = lj_strfmt_putfxint(sb, sf, b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_bit(lua_State *L)
+{
+  LJ_LIB_REG(L, LUA_BITLIBNAME, bit);
+  return 1;
+}
+

+ 2 - 2
jni/LuaJIT-2.0.1/src/lib_debug.c → jni/LuaJIT-2.1/src/lib_debug.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Debug library.
 ** Debug library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
   return 1;
   return 1;
 }
 }
 
 
-LJLIB_CF(debug_getmetatable)
+LJLIB_CF(debug_getmetatable)	LJLIB_REC(.)
 {
 {
   lj_lib_checkany(L, 1);
   lj_lib_checkany(L, 1);
   if (!lua_getmetatable(L, 1)) {
   if (!lua_getmetatable(L, 1)) {

+ 37 - 15
jni/LuaJIT-2.0.1/src/lib_ffi.c → jni/LuaJIT-2.1/src/lib_ffi.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** FFI library.
 ** FFI library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lib_ffi_c
 #define lib_ffi_c
@@ -29,6 +29,7 @@
 #include "lj_ccall.h"
 #include "lj_ccall.h"
 #include "lj_ccallback.h"
 #include "lj_ccallback.h"
 #include "lj_clib.h"
 #include "lj_clib.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
@@ -136,7 +137,8 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
 	return 0;
 	return 0;
       }
       }
     }
     }
-    tv = L->top-1;
+    copyTV(L, base, L->top);
+    tv = L->top-1-LJ_FR2;
   }
   }
   return lj_meta_tailcall(L, tv);
   return lj_meta_tailcall(L, tv);
 }
 }
@@ -317,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
       }
       }
     }
     }
   }
   }
-  lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
+  lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
 checkgc:
 checkgc:
   lj_gc_check(L);
   lj_gc_check(L);
   return 1;
   return 1;
@@ -506,7 +508,7 @@ LJLIB_CF(ffi_new)	LJLIB_REC(.)
   if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
   if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
     cd = lj_cdata_new(cts, id, sz);
     cd = lj_cdata_new(cts, id, sz);
   else
   else
-    cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
+    cd = lj_cdata_newv(L, id, sz, ctype_align(info));
   setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
   setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
   lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
   lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
@@ -557,6 +559,31 @@ LJLIB_CF(ffi_typeof)	LJLIB_REC(.)
   return 1;
   return 1;
 }
 }
 
 
+/* Internal and unsupported API. */
+LJLIB_CF(ffi_typeinfo)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = (CTypeID)ffi_checkint(L, 1);
+  if (id > 0 && id < cts->top) {
+    CType *ct = ctype_get(cts, id);
+    GCtab *t;
+    lua_createtable(L, 0, 4);  /* Increment hash size if fields are added. */
+    t = tabV(L->top-1);
+    setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
+    if (ct->size != CTSIZE_INVALID)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
+    if (ct->sib)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
+    if (gcref(ct->name)) {
+      GCstr *s = gco2str(gcref(ct->name));
+      setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
+    }
+    lj_gc_check(L);
+    return 1;
+  }
+  return 0;
+}
+
 LJLIB_CF(ffi_istype)	LJLIB_REC(.)
 LJLIB_CF(ffi_istype)	LJLIB_REC(.)
 {
 {
   CTState *cts = ctype_cts(L);
   CTState *cts = ctype_cts(L);
@@ -576,7 +603,7 @@ LJLIB_CF(ffi_istype)	LJLIB_REC(.)
       if (ctype_ispointer(ct1->info))
       if (ctype_ispointer(ct1->info))
 	b = lj_cconv_compatptr(cts, ct1, ct2, CCF_IGNQUAL);
 	b = lj_cconv_compatptr(cts, ct1, ct2, CCF_IGNQUAL);
       else if (ctype_isnum(ct1->info) || ctype_isvoid(ct1->info))
       else if (ctype_isnum(ct1->info) || ctype_isvoid(ct1->info))
-	b = (((ct1->info ^ ct2->info) & ~CTF_QUAL) == 0);
+	b = (((ct1->info ^ ct2->info) & ~(CTF_QUAL|CTF_LONG)) == 0);
     } else if (ctype_isstruct(ct1->info) && ctype_isptr(ct2->info) &&
     } else if (ctype_isstruct(ct1->info) && ctype_isptr(ct2->info) &&
 	       ct1 == ctype_rawchild(cts, ct2)) {
 	       ct1 == ctype_rawchild(cts, ct2)) {
       b = 1;
       b = 1;
@@ -657,7 +684,7 @@ LJLIB_CF(ffi_string)	LJLIB_REC(.)
   TValue *o = lj_lib_checkany(L, 1);
   TValue *o = lj_lib_checkany(L, 1);
   const char *p;
   const char *p;
   size_t len;
   size_t len;
-  if (o+1 < L->top) {
+  if (o+1 < L->top && !tvisnil(o+1)) {
     len = (size_t)ffi_checkint(L, 2);
     len = (size_t)ffi_checkint(L, 2);
     lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, o,
     lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, o,
 		   CCF_ARG(1));
 		   CCF_ARG(1));
@@ -724,6 +751,9 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
   case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
   case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
 #endif
 #endif
   case H_(3af93066,1f001464): b = 1; break;  /* le/be */
   case H_(3af93066,1f001464): b = 1; break;  /* le/be */
+#if LJ_GC64
+  case H_(9e89d2c9,13c83c92): b = 1; break;  /* gc64 */
+#endif
   default:
   default:
     break;
     break;
   }
   }
@@ -767,19 +797,11 @@ LJLIB_CF(ffi_gc)	LJLIB_REC(.)
   GCcdata *cd = ffi_checkcdata(L, 1);
   GCcdata *cd = ffi_checkcdata(L, 1);
   TValue *fin = lj_lib_checkany(L, 2);
   TValue *fin = lj_lib_checkany(L, 2);
   CTState *cts = ctype_cts(L);
   CTState *cts = ctype_cts(L);
-  GCtab *t = cts->finalizer;
   CType *ct = ctype_raw(cts, cd->ctypeid);
   CType *ct = ctype_raw(cts, cd->ctypeid);
   if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
   if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
 	ctype_isrefarray(ct->info)))
 	ctype_isrefarray(ct->info)))
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
-  if (gcref(t->metatable)) {  /* Update finalizer table, if still enabled. */
-    copyTV(L, lj_tab_set(L, t, L->base), fin);
-    lj_gc_anybarriert(L, t);
-    if (!tvisnil(fin))
-      cd->marked |= LJ_GC_CDATA_FIN;
-    else
-      cd->marked &= ~LJ_GC_CDATA_FIN;
-  }
+  lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
   L->top = L->base+1;  /* Pass through the cdata object. */
   L->top = L->base+1;  /* Pass through the cdata object. */
   return 1;
   return 1;
 }
 }

+ 1 - 1
jni/LuaJIT-2.0.1/src/lib_init.c → jni/LuaJIT-2.1/src/lib_init.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Library initialization.
 ** Library initialization.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major parts taken verbatim from the Lua interpreter.
 ** Major parts taken verbatim from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

+ 30 - 24
jni/LuaJIT-2.0.1/src/lib_io.c → jni/LuaJIT-2.1/src/lib_io.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** I/O library.
 ** I/O library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -17,9 +17,12 @@
 #include "lualib.h"
 #include "lualib.h"
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_state.h"
 #include "lj_state.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
@@ -83,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
   IOFileUD *iof = io_file_new(L);
   IOFileUD *iof = io_file_new(L);
   iof->fp = fopen(fname, mode);
   iof->fp = fopen(fname, mode);
   if (iof->fp == NULL)
   if (iof->fp == NULL)
-    luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
+    luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
   return iof;
   return iof;
 }
 }
 
 
@@ -96,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
     int stat = -1;
     int stat = -1;
 #if LJ_TARGET_POSIX
 #if LJ_TARGET_POSIX
     stat = pclose(iof->fp);
     stat = pclose(iof->fp);
-#elif LJ_TARGET_WINDOWS
+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE
     stat = _pclose(iof->fp);
     stat = _pclose(iof->fp);
 #else
 #else
     lua_assert(0);
     lua_assert(0);
@@ -144,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
   MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
   MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
   char *buf;
   char *buf;
   for (;;) {
   for (;;) {
-    buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    buf = lj_buf_tmp(L, m);
     if (fgets(buf+n, m-n, fp) == NULL) break;
     if (fgets(buf+n, m-n, fp) == NULL) break;
     n += (MSize)strlen(buf+n);
     n += (MSize)strlen(buf+n);
     ok |= n;
     ok |= n;
@@ -152,6 +155,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
     if (n >= m - 64) m += m;
     if (n >= m - 64) m += m;
   }
   }
   setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
   setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+  lj_gc_check(L);
   return (int)ok;
   return (int)ok;
 }
 }
 
 
@@ -159,10 +163,11 @@ static void io_file_readall(lua_State *L, FILE *fp)
 {
 {
   MSize m, n;
   MSize m, n;
   for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
   for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     n += (MSize)fread(buf+n, 1, m-n, fp);
     n += (MSize)fread(buf+n, 1, m-n, fp);
     if (n != m) {
     if (n != m) {
       setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
       setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+      lj_gc_check(L);
       return;
       return;
     }
     }
   }
   }
@@ -171,9 +176,10 @@ static void io_file_readall(lua_State *L, FILE *fp)
 static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
 static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
 {
 {
   if (m) {
   if (m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     MSize n = (MSize)fread(buf, 1, m, fp);
     MSize n = (MSize)fread(buf, 1, m, fp);
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+    lj_gc_check(L);
     return (n > 0 || m == 0);
     return (n > 0 || m == 0);
   } else {
   } else {
     int c = getc(fp);
     int c = getc(fp);
@@ -226,19 +232,12 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
   cTValue *tv;
   cTValue *tv;
   int status = 1;
   int status = 1;
   for (tv = L->base+start; tv < L->top; tv++) {
   for (tv = L->base+start; tv < L->top; tv++) {
-    if (tvisstr(tv)) {
-      MSize len = strV(tv)->len;
-      status = status && (fwrite(strVdata(tv), 1, len, fp) == len);
-    } else if (tvisint(tv)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(tv));
-      size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
-      status = status && (fwrite(p, 1, len, fp) == len);
-    } else if (tvisnum(tv)) {
-      status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
-    } else {
+    char buf[STRFMT_MAXBUF_NUM];
+    MSize len;
+    const char *p = lj_strfmt_wstrnum(buf, tv, &len);
+    if (!p)
       lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
       lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
-    }
+    status = status && (fwrite(p, 1, len, fp) == len);
   }
   }
   if (LJ_52 && status) {
   if (LJ_52 && status) {
     L->top = L->base+1;
     L->top = L->base+1;
@@ -274,6 +273,15 @@ static int io_file_iter(lua_State *L)
   return n;
   return n;
 }
 }
 
 
+static int io_file_lines(lua_State *L)
+{
+  int n = (int)(L->top - L->base);
+  if (n > LJ_MAX_UPVAL)
+    lj_err_caller(L, LJ_ERR_UNPACK);
+  lua_pushcclosure(L, io_file_iter, n);
+  return 1;
+}
+
 /* -- I/O file methods ---------------------------------------------------- */
 /* -- I/O file methods ---------------------------------------------------- */
 
 
 #define LJLIB_MODULE_io_method
 #define LJLIB_MODULE_io_method
@@ -357,8 +365,7 @@ LJLIB_CF(io_method_setvbuf)
 LJLIB_CF(io_method_lines)
 LJLIB_CF(io_method_lines)
 {
 {
   io_tofile(L);
   io_tofile(L);
-  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
-  return 1;
+  return io_file_lines(L);
 }
 }
 
 
 LJLIB_CF(io_method___gc)
 LJLIB_CF(io_method___gc)
@@ -401,7 +408,7 @@ LJLIB_CF(io_open)
 
 
 LJLIB_CF(io_popen)
 LJLIB_CF(io_popen)
 {
 {
-#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS
+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE)
   const char *fname = strdata(lj_lib_checkstr(L, 1));
   const char *fname = strdata(lj_lib_checkstr(L, 1));
   GCstr *s = lj_lib_optstr(L, 2);
   GCstr *s = lj_lib_optstr(L, 2);
   const char *mode = s ? strdata(s) : "r";
   const char *mode = s ? strdata(s) : "r";
@@ -422,7 +429,7 @@ LJLIB_CF(io_popen)
 LJLIB_CF(io_tmpfile)
 LJLIB_CF(io_tmpfile)
 {
 {
   IOFileUD *iof = io_file_new(L);
   IOFileUD *iof = io_file_new(L);
-#if LJ_TARGET_PS3
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
   iof->fp = NULL; errno = ENOSYS;
   iof->fp = NULL; errno = ENOSYS;
 #else
 #else
   iof->fp = tmpfile();
   iof->fp = tmpfile();
@@ -488,8 +495,7 @@ LJLIB_CF(io_lines)
   } else {  /* io.lines() iterates over stdin. */
   } else {  /* io.lines() iterates over stdin. */
     setudataV(L, L->base, IOSTDF_UD(L, GCROOT_IO_INPUT));
     setudataV(L, L->base, IOSTDF_UD(L, GCROOT_IO_INPUT));
   }
   }
-  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
-  return 1;
+  return io_file_lines(L);
 }
 }
 
 
 LJLIB_CF(io_type)
 LJLIB_CF(io_type)

+ 131 - 27
jni/LuaJIT-2.0.1/src/lib_jit.c → jni/LuaJIT-2.1/src/lib_jit.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** JIT library.
 ** JIT library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lib_jit_c
 #define lib_jit_c
@@ -10,13 +10,17 @@
 #include "lauxlib.h"
 #include "lauxlib.h"
 #include "lualib.h"
 #include "lualib.h"
 
 
-#include "lj_arch.h"
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
 #include "lj_debug.h"
 #include "lj_debug.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
+#include "lj_state.h"
 #include "lj_bc.h"
 #include "lj_bc.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
 #if LJ_HASJIT
 #if LJ_HASJIT
 #include "lj_ir.h"
 #include "lj_ir.h"
 #include "lj_jit.h"
 #include "lj_jit.h"
@@ -24,6 +28,7 @@
 #include "lj_iropt.h"
 #include "lj_iropt.h"
 #include "lj_target.h"
 #include "lj_target.h"
 #endif
 #endif
+#include "lj_trace.h"
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "lj_vmevent.h"
 #include "lj_vmevent.h"
@@ -73,7 +78,7 @@ LJLIB_CF(jit_off)
 LJLIB_CF(jit_flush)
 LJLIB_CF(jit_flush)
 {
 {
 #if LJ_HASJIT
 #if LJ_HASJIT
-  if (L->base < L->top && !tvisnil(L->base)) {
+  if (L->base < L->top && tvisnumber(L->base)) {
     int traceno = lj_lib_checkint(L, 1);
     int traceno = lj_lib_checkint(L, 1);
     luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE);
     luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE);
     return 0;
     return 0;
@@ -279,7 +284,7 @@ static GCtrace *jit_checktrace(lua_State *L)
 /* Names of link types. ORDER LJ_TRLINK */
 /* Names of link types. ORDER LJ_TRLINK */
 static const char *const jit_trlinkname[] = {
 static const char *const jit_trlinkname[] = {
   "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
   "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
-  "interpreter", "return"
+  "interpreter", "return", "stitch"
 };
 };
 
 
 /* local info = jit.util.traceinfo(tr) */
 /* local info = jit.util.traceinfo(tr) */
@@ -332,6 +337,13 @@ LJLIB_CF(jit_util_tracek)
       slot = ir->op2;
       slot = ir->op2;
       ir = &T->ir[ir->op1];
       ir = &T->ir[ir->op1];
     }
     }
+#if LJ_HASFFI
+    if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
+      ptrdiff_t oldtop = savestack(L, L->top);
+      luaopen_ffi(L);  /* Load FFI library on-demand. */
+      L->top = restorestack(L, oldtop);
+    }
+#endif
     lj_ir_kvalue(L, L->top-2, ir);
     lj_ir_kvalue(L, L->top-2, ir);
     setintV(L->top-1, (int32_t)irt_type(ir->t));
     setintV(L->top-1, (int32_t)irt_type(ir->t));
     if (slot == -1)
     if (slot == -1)
@@ -416,6 +428,12 @@ LJLIB_CF(jit_util_ircalladdr)
 
 
 #include "lj_libdef.h"
 #include "lj_libdef.h"
 
 
+static int luaopen_jit_util(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_util);
+  return 1;
+}
+
 /* -- jit.opt module ------------------------------------------------------ */
 /* -- jit.opt module ------------------------------------------------------ */
 
 
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -513,6 +531,104 @@ LJLIB_CF(jit_opt_start)
 
 
 #endif
 #endif
 
 
+/* -- jit.profile module -------------------------------------------------- */
+
+#if LJ_HASPROFILE
+
+#define LJLIB_MODULE_jit_profile
+
+/* Not loaded by default, use: local profile = require("jit.profile") */
+
+static const char KEY_PROFILE_THREAD = 't';
+static const char KEY_PROFILE_FUNC = 'f';
+
+static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
+				 int vmstate)
+{
+  TValue key;
+  cTValue *tv;
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  tv = lj_tab_get(L, tabV(registry(L)), &key);
+  if (tvisfunc(tv)) {
+    char vmst = (char)vmstate;
+    int status;
+    setfuncV(L2, L2->top++, funcV(tv));
+    setthreadV(L2, L2->top++, L);
+    setintV(L2->top++, samples);
+    setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
+    status = lua_pcall(L2, 3, 0, 0);  /* callback(thread, samples, vmstate) */
+    if (status) {
+      if (G(L2)->panic) G(L2)->panic(L2);
+      exit(EXIT_FAILURE);
+    }
+    lj_trace_abort(G(L2));
+  }
+}
+
+/* profile.start(mode, cb) */
+LJLIB_CF(jit_profile_start)
+{
+  GCtab *registry = tabV(registry(L));
+  GCstr *mode = lj_lib_optstr(L, 1);
+  GCfunc *func = lj_lib_checkfunc(L, 2);
+  lua_State *L2 = lua_newthread(L);  /* Thread that runs profiler callback. */
+  TValue key;
+  /* Anchor thread and function in registry. */
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setthreadV(L, lj_tab_set(L, registry, &key), L2);
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setfuncV(L, lj_tab_set(L, registry, &key), func);
+  lj_gc_anybarriert(L, registry);
+  luaJIT_profile_start(L, mode ? strdata(mode) : "",
+		       (luaJIT_profile_callback)jit_profile_callback, L2);
+  return 0;
+}
+
+/* profile.stop() */
+LJLIB_CF(jit_profile_stop)
+{
+  GCtab *registry;
+  TValue key;
+  luaJIT_profile_stop(L);
+  registry = tabV(registry(L));
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setnilV(lj_tab_set(L, registry, &key));
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setnilV(lj_tab_set(L, registry, &key));
+  lj_gc_anybarriert(L, registry);
+  return 0;
+}
+
+/* dump = profile.dumpstack([thread,] fmt, depth) */
+LJLIB_CF(jit_profile_dumpstack)
+{
+  lua_State *L2 = L;
+  int arg = 0;
+  size_t len;
+  int depth;
+  GCstr *fmt;
+  const char *p;
+  if (L->top > L->base && tvisthread(L->base)) {
+    L2 = threadV(L->base);
+    arg = 1;
+  }
+  fmt = lj_lib_checkstr(L, arg+1);
+  depth = lj_lib_checkint(L, arg+2);
+  p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
+  lua_pushlstring(L, p, len);
+  return 1;
+}
+
+#include "lj_libdef.h"
+
+static int luaopen_jit_profile(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_profile);
+  return 1;
+}
+
+#endif
+
 /* -- JIT compiler initialization ----------------------------------------- */
 /* -- JIT compiler initialization ----------------------------------------- */
 
 
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -538,23 +654,17 @@ static uint32_t jit_cpudetect(lua_State *L)
   uint32_t features[4];
   uint32_t features[4];
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
 #if !LJ_HASJIT
 #if !LJ_HASJIT
-#define JIT_F_CMOV	1
 #define JIT_F_SSE2	2
 #define JIT_F_SSE2	2
 #endif
 #endif
-    flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
 #if LJ_HASJIT
 #if LJ_HASJIT
     flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
     flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
-      if ((features[0] & 0x0ff00f00) == 0x00000f00)  /* P4. */
-	flags |= JIT_F_P4;  /* Currently unused. */
-      else if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
+      if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
 	flags |= JIT_F_LEA_AGU;
 	flags |= JIT_F_LEA_AGU;
     } else if (vendor[2] == 0x444d4163) {  /* AMD. */
     } else if (vendor[2] == 0x444d4163) {  /* AMD. */
       uint32_t fam = (features[0] & 0x0ff00f00);
       uint32_t fam = (features[0] & 0x0ff00f00);
-      if (fam == 0x00000f00)  /* K8. */
-	flags |= JIT_F_SPLIT_XMM;
       if (fam >= 0x00000f00)  /* K8, K10. */
       if (fam >= 0x00000f00)  /* K8, K10. */
 	flags |= JIT_F_PREFER_IMUL;
 	flags |= JIT_F_PREFER_IMUL;
     }
     }
@@ -562,14 +672,8 @@ static uint32_t jit_cpudetect(lua_State *L)
   }
   }
   /* Check for required instruction set support on x86 (unnecessary on x64). */
   /* Check for required instruction set support on x86 (unnecessary on x64). */
 #if LJ_TARGET_X86
 #if LJ_TARGET_X86
-#if !defined(LUAJIT_CPU_NOCMOV)
-  if (!(flags & JIT_F_CMOV))
-    luaL_error(L, "CPU not supported");
-#endif
-#if defined(LUAJIT_CPU_SSE2)
   if (!(flags & JIT_F_SSE2))
   if (!(flags & JIT_F_SSE2))
-    luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
-#endif
+    luaL_error(L, "CPU with SSE2 required");
 #endif
 #endif
 #elif LJ_TARGET_ARM
 #elif LJ_TARGET_ARM
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -591,6 +695,8 @@ static uint32_t jit_cpudetect(lua_State *L)
 	   ver >= 60 ? JIT_F_ARMV6_ : 0;
 	   ver >= 60 ? JIT_F_ARMV6_ : 0;
   flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
   flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
 #endif
 #endif
+#elif LJ_TARGET_ARM64
+  /* No optional CPU features to detect (for now). */
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 #if LJ_HASJIT
 #if LJ_HASJIT
 #if LJ_ARCH_SQRT
 #if LJ_ARCH_SQRT
@@ -600,8 +706,6 @@ static uint32_t jit_cpudetect(lua_State *L)
   flags |= JIT_F_ROUND;
   flags |= JIT_F_ROUND;
 #endif
 #endif
 #endif
 #endif
-#elif LJ_TARGET_PPCSPE
-  /* Nothing to do. */
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 #if LJ_HASJIT
 #if LJ_HASJIT
   /* Compile-time MIPS CPU detection. */
   /* Compile-time MIPS CPU detection. */
@@ -631,11 +735,7 @@ static void jit_init(lua_State *L)
   uint32_t flags = jit_cpudetect(L);
   uint32_t flags = jit_cpudetect(L);
 #if LJ_HASJIT
 #if LJ_HASJIT
   jit_State *J = L2J(L);
   jit_State *J = L2J(L);
-#if LJ_TARGET_X86
-  /* Silently turn off the JIT compiler on CPUs without SSE2. */
-  if ((flags & JIT_F_SSE2))
-#endif
-    J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
+  J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
   memcpy(J->param, jit_param_default, sizeof(J->param));
   memcpy(J->param, jit_param_default, sizeof(J->param));
   lj_dispatch_update(G(L));
   lj_dispatch_update(G(L));
 #else
 #else
@@ -645,19 +745,23 @@ static void jit_init(lua_State *L)
 
 
 LUALIB_API int luaopen_jit(lua_State *L)
 LUALIB_API int luaopen_jit(lua_State *L)
 {
 {
+  jit_init(L);
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
   lua_pushinteger(L, LUAJIT_VERSION_NUM);
   lua_pushinteger(L, LUAJIT_VERSION_NUM);
   lua_pushliteral(L, LUAJIT_VERSION);
   lua_pushliteral(L, LUAJIT_VERSION);
   LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
   LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
+#if LJ_HASPROFILE
+  lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
+		tabref(L->env));
+#endif
 #ifndef LUAJIT_DISABLE_JITUTIL
 #ifndef LUAJIT_DISABLE_JITUTIL
-  LJ_LIB_REG(L, "jit.util", jit_util);
+  lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
 #endif
 #endif
 #if LJ_HASJIT
 #if LJ_HASJIT
   LJ_LIB_REG(L, "jit.opt", jit_opt);
   LJ_LIB_REG(L, "jit.opt", jit_opt);
 #endif
 #endif
   L->top -= 2;
   L->top -= 2;
-  jit_init(L);
   return 1;
   return 1;
 }
 }
 
 

+ 4 - 7
jni/LuaJIT-2.0.1/src/lib_math.c → jni/LuaJIT-2.1/src/lib_math.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Math library.
 ** Math library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include <math.h>
 #include <math.h>
@@ -57,17 +57,14 @@ LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 #else
 #else
     x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
     x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
 #endif
 #endif
-    setnumV(L->base-1, x*y);  /* Do NOT join the expression to x / y. */
+    setnumV(L->base-1-LJ_FR2, x*y);  /* Do NOT join the expression to x / y. */
     return FFH_RES(1);
     return FFH_RES(1);
   }
   }
   return FFH_RETRY;
   return FFH_RETRY;
 }
 }
 
 
-LJLIB_PUSH(57.29577951308232)
-LJLIB_ASM_(math_deg)		LJLIB_REC(math_degrad)
-
-LJLIB_PUSH(0.017453292519943295)
-LJLIB_ASM_(math_rad)		LJLIB_REC(math_degrad)
+LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
+LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
 
 
 LJLIB_ASM(math_atan2)		LJLIB_REC(.)
 LJLIB_ASM(math_atan2)		LJLIB_REC(.)
 {
 {

+ 32 - 20
jni/LuaJIT-2.0.1/src/lib_os.c → jni/LuaJIT-2.1/src/lib_os.c

@@ -1,13 +1,12 @@
 /*
 /*
 ** OS library.
 ** OS library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 */
 */
 
 
 #include <errno.h>
 #include <errno.h>
-#include <locale.h>
 #include <time.h>
 #include <time.h>
 
 
 #define lib_os_c
 #define lib_os_c
@@ -18,7 +17,10 @@
 #include "lualib.h"
 #include "lualib.h"
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 #if LJ_TARGET_POSIX
 #if LJ_TARGET_POSIX
@@ -27,13 +29,17 @@
 #include <stdio.h>
 #include <stdio.h>
 #endif
 #endif
 
 
+#if !LJ_TARGET_PSVITA
+#include <locale.h>
+#endif
+
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_os
 #define LJLIB_MODULE_os
 
 
 LJLIB_CF(os_execute)
 LJLIB_CF(os_execute)
 {
 {
-#if LJ_TARGET_CONSOLE
+#if LJ_NO_SYSTEM
 #if LJ_52
 #if LJ_52
   errno = ENOSYS;
   errno = ENOSYS;
   return luaL_fileresult(L, 0, NULL);
   return luaL_fileresult(L, 0, NULL);
@@ -70,7 +76,7 @@ LJLIB_CF(os_rename)
 
 
 LJLIB_CF(os_tmpname)
 LJLIB_CF(os_tmpname)
 {
 {
-#if LJ_TARGET_PS3
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
   lj_err_caller(L, LJ_ERR_OSUNIQF);
   lj_err_caller(L, LJ_ERR_OSUNIQF);
   return 0;
   return 0;
 #else
 #else
@@ -185,7 +191,7 @@ LJLIB_CF(os_date)
 #endif
 #endif
   }
   }
   if (stm == NULL) {  /* Invalid date? */
   if (stm == NULL) {  /* Invalid date? */
-    setnilV(L->top-1);
+    setnilV(L->top++);
   } else if (strcmp(s, "*t") == 0) {
   } else if (strcmp(s, "*t") == 0) {
     lua_createtable(L, 0, 9);  /* 9 = number of fields */
     lua_createtable(L, 0, 9);  /* 9 = number of fields */
     setfield(L, "sec", stm->tm_sec);
     setfield(L, "sec", stm->tm_sec);
@@ -197,23 +203,25 @@ LJLIB_CF(os_date)
     setfield(L, "wday", stm->tm_wday+1);
     setfield(L, "wday", stm->tm_wday+1);
     setfield(L, "yday", stm->tm_yday+1);
     setfield(L, "yday", stm->tm_yday+1);
     setboolfield(L, "isdst", stm->tm_isdst);
     setboolfield(L, "isdst", stm->tm_isdst);
-  } else {
-    char cc[3];
-    luaL_Buffer b;
-    cc[0] = '%'; cc[2] = '\0';
-    luaL_buffinit(L, &b);
-    for (; *s; s++) {
-      if (*s != '%' || *(s + 1) == '\0') {  /* No conversion specifier? */
-	luaL_addchar(&b, *s);
-      } else {
-	size_t reslen;
-	char buff[200];  /* Should be big enough for any conversion result. */
-	cc[1] = *(++s);
-	reslen = strftime(buff, sizeof(buff), cc, stm);
-	luaL_addlstring(&b, buff, reslen);
+  } else if (*s) {
+    SBuf *sb = &G(L)->tmpbuf;
+    MSize sz = 0;
+    const char *q;
+    for (q = s; *q; q++)
+      sz += (*q == '%') ? 30 : 1;  /* Overflow doesn't matter. */
+    setsbufL(sb, L);
+    for (;;) {
+      char *buf = lj_buf_need(sb, sz);
+      size_t len = strftime(buf, sbufsz(sb), s, stm);
+      if (len) {
+	setstrV(L, L->top++, lj_str_new(L, buf, len));
+	lj_gc_check(L);
+	break;
       }
       }
+      sz += (sz|1);
     }
     }
-    luaL_pushresult(&b);
+  } else {
+    setstrV(L, L->top++, &G(L)->strempty);
   }
   }
   return 1;
   return 1;
 }
 }
@@ -254,6 +262,9 @@ LJLIB_CF(os_difftime)
 
 
 LJLIB_CF(os_setlocale)
 LJLIB_CF(os_setlocale)
 {
 {
+#if LJ_TARGET_PSVITA
+  lua_pushliteral(L, "C");
+#else
   GCstr *s = lj_lib_optstr(L, 1);
   GCstr *s = lj_lib_optstr(L, 1);
   const char *str = s ? strdata(s) : NULL;
   const char *str = s ? strdata(s) : NULL;
   int opt = lj_lib_checkopt(L, 2, 6,
   int opt = lj_lib_checkopt(L, 2, 6,
@@ -265,6 +276,7 @@ LJLIB_CF(os_setlocale)
   else if (opt == 4) opt = LC_MONETARY;
   else if (opt == 4) opt = LC_MONETARY;
   else if (opt == 6) opt = LC_ALL;
   else if (opt == 6) opt = LC_ALL;
   lua_pushstring(L, setlocale(opt, str));
   lua_pushstring(L, setlocale(opt, str));
+#endif
   return 1;
   return 1;
 }
 }
 
 

+ 12 - 7
jni/LuaJIT-2.0.1/src/lib_package.c → jni/LuaJIT-2.1/src/lib_package.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Package library.
 ** Package library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -68,9 +68,6 @@ static const char *ll_bcsym(void *lib, const char *sym)
 #elif LJ_TARGET_WINDOWS
 #elif LJ_TARGET_WINDOWS
 
 
 #define WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
-#ifndef WINVER
-#define WINVER 0x0500
-#endif
 #include <windows.h>
 #include <windows.h>
 
 
 #ifndef GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
 #ifndef GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
@@ -99,9 +96,17 @@ static void setprogdir(lua_State *L)
 static void pusherror(lua_State *L)
 static void pusherror(lua_State *L)
 {
 {
   DWORD error = GetLastError();
   DWORD error = GetLastError();
+#if LJ_TARGET_XBOXONE
+  wchar_t wbuffer[128];
+  char buffer[128*2];
+  if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
+      NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) &&
+      WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL))
+#else
   char buffer[128];
   char buffer[128];
   if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
   if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
       NULL, error, 0, buffer, sizeof(buffer), NULL))
       NULL, error, 0, buffer, sizeof(buffer), NULL))
+#endif
     lua_pushstring(L, buffer);
     lua_pushstring(L, buffer);
   else
   else
     lua_pushfstring(L, "system error %d\n", error);
     lua_pushfstring(L, "system error %d\n", error);
@@ -114,7 +119,7 @@ static void ll_unloadlib(void *lib)
 
 
 static void *ll_load(lua_State *L, const char *path, int gl)
 static void *ll_load(lua_State *L, const char *path, int gl)
 {
 {
-  HINSTANCE lib = LoadLibraryA(path);
+  HINSTANCE lib = LoadLibraryExA(path, NULL, 0);
   if (lib == NULL) pusherror(L);
   if (lib == NULL) pusherror(L);
   UNUSED(gl);
   UNUSED(gl);
   return lib;
   return lib;
@@ -229,7 +234,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
       const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
       const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
       lua_pop(L, 1);
       lua_pop(L, 1);
       if (bcdata) {
       if (bcdata) {
-	if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+	if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
 	  return PACKAGE_ERR_LOAD;
 	  return PACKAGE_ERR_LOAD;
 	return 0;
 	return 0;
       }
       }
@@ -386,7 +391,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
   if (lua_isnil(L, -1)) {  /* Not found? */
   if (lua_isnil(L, -1)) {  /* Not found? */
     const char *bcname = mksymname(L, name, SYMPREFIX_BC);
     const char *bcname = mksymname(L, name, SYMPREFIX_BC);
     const char *bcdata = ll_bcsym(NULL, bcname);
     const char *bcdata = ll_bcsym(NULL, bcname);
-    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
       lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
       lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
   }
   }
   return 1;
   return 1;

+ 131 - 319
jni/LuaJIT-2.0.1/src/lib_string.c → jni/LuaJIT-2.1/src/lib_string.c

@@ -1,13 +1,11 @@
 /*
 /*
 ** String library.
 ** String library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 */
 */
 
 
-#include <stdio.h>
-
 #define lib_string_c
 #define lib_string_c
 #define LUA_LIB
 #define LUA_LIB
 
 
@@ -18,6 +16,7 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
 #include "lj_meta.h"
@@ -25,17 +24,19 @@
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_bcdump.h"
 #include "lj_bcdump.h"
 #include "lj_char.h"
 #include "lj_char.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_string
 #define LJLIB_MODULE_string
 
 
-LJLIB_ASM(string_len)		LJLIB_REC(.)
-{
-  lj_lib_checkstr(L, 1);
-  return FFH_RETRY;
-}
+LJLIB_LUA(string_len) /*
+  function(s)
+    CHECK_str(s)
+    return #s
+  end
+*/
 
 
 LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
 LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
 {
 {
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
   lj_state_checkstack(L, (MSize)n);
   lj_state_checkstack(L, (MSize)n);
   p = (const unsigned char *)strdata(s) + start;
   p = (const unsigned char *)strdata(s) + start;
   for (i = 0; i < n; i++)
   for (i = 0; i < n; i++)
-    setintV(L->base + i-1, p[i]);
+    setintV(L->base + i-1-LJ_FR2, p[i]);
   return FFH_RES(n);
   return FFH_RES(n);
 }
 }
 
 
-LJLIB_ASM(string_char)
+LJLIB_ASM(string_char)		LJLIB_REC(.)
 {
 {
   int i, nargs = (int)(L->top - L->base);
   int i, nargs = (int)(L->top - L->base);
-  char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs);
+  char *buf = lj_buf_tmp(L, (MSize)nargs);
   for (i = 1; i <= nargs; i++) {
   for (i = 1; i <= nargs; i++) {
     int32_t k = lj_lib_checkint(L, i);
     int32_t k = lj_lib_checkint(L, i);
     if (!checku8(k))
     if (!checku8(k))
       lj_err_arg(L, i, LJ_ERR_BADVAL);
       lj_err_arg(L, i, LJ_ERR_BADVAL);
     buf[i-1] = (char)k;
     buf[i-1] = (char)k;
   }
   }
-  setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
+  setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
   return FFH_RES(1);
   return FFH_RES(1);
 }
 }
 
 
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub)		LJLIB_REC(string_range 1)
   return FFH_RETRY;
   return FFH_RETRY;
 }
 }
 
 
-LJLIB_ASM(string_rep)
+LJLIB_CF(string_rep)		LJLIB_REC(.)
 {
 {
   GCstr *s = lj_lib_checkstr(L, 1);
   GCstr *s = lj_lib_checkstr(L, 1);
-  int32_t k = lj_lib_checkint(L, 2);
+  int32_t rep = lj_lib_checkint(L, 2);
   GCstr *sep = lj_lib_optstr(L, 3);
   GCstr *sep = lj_lib_optstr(L, 3);
-  int32_t len = (int32_t)s->len;
-  global_State *g = G(L);
-  int64_t tlen;
-  const char *src;
-  char *buf;
-  if (k <= 0) {
-  empty:
-    setstrV(L, L->base-1, &g->strempty);
-    return FFH_RES(1);
+  SBuf *sb = lj_buf_tmp_(L);
+  if (sep && rep > 1) {
+    GCstr *s2 = lj_buf_cat2str(L, sep, s);
+    lj_buf_reset(sb);
+    lj_buf_putstr(sb, s);
+    s = s2;
+    rep--;
   }
   }
-  if (sep) {
-    tlen = (int64_t)len + sep->len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-    tlen *= k;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  } else {
-    tlen = (int64_t)k * len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  }
-  if (tlen == 0) goto empty;
-  buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
-  src = strdata(s);
-  if (sep) {
-    tlen -= sep->len;  /* Ignore trailing separator. */
-    if (k > 1) {  /* Paste one string and one separator. */
-      int32_t i;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = strdata(sep); len = sep->len;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = g->tmpbuf.buf; len += s->len; k--;  /* Now copy that k-1 times. */
-    }
-  }
-  do {
-    int32_t i = 0;
-    do { *buf++ = src[i++]; } while (i < len);
-  } while (--k > 0);
-  setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
-  return FFH_RES(1);
+  sb = lj_buf_putstr_rep(sb, s, rep);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
+  return 1;
 }
 }
 
 
-LJLIB_ASM(string_reverse)
+LJLIB_ASM(string_reverse)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
 {
 {
-  GCstr *s = lj_lib_checkstr(L, 1);
-  lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
+  lj_lib_checkstr(L, 1);
   return FFH_RETRY;
   return FFH_RETRY;
 }
 }
-LJLIB_ASM_(string_lower)
-LJLIB_ASM_(string_upper)
+LJLIB_ASM_(string_lower)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
+LJLIB_ASM_(string_upper)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-static int writer_buf(lua_State *L, const void *p, size_t size, void *b)
+static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
 {
 {
-  luaL_addlstring((luaL_Buffer *)b, (const char *)p, size);
+  lj_buf_putmem((SBuf *)sb, p, (MSize)size);
   UNUSED(L);
   UNUSED(L);
   return 0;
   return 0;
 }
 }
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump)
 {
 {
   GCfunc *fn = lj_lib_checkfunc(L, 1);
   GCfunc *fn = lj_lib_checkfunc(L, 1);
   int strip = L->base+1 < L->top && tvistruecond(L->base+1);
   int strip = L->base+1 < L->top && tvistruecond(L->base+1);
-  luaL_Buffer b;
+  SBuf *sb = lj_buf_tmp_(L);  /* Assumes lj_bcwrite() doesn't use tmpbuf. */
   L->top = L->base+1;
   L->top = L->base+1;
-  luaL_buffinit(L, &b);
-  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
+  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
     lj_err_caller(L, LJ_ERR_STRDUMP);
     lj_err_caller(L, LJ_ERR_STRDUMP);
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 
@@ -183,7 +154,6 @@ typedef struct MatchState {
 } MatchState;
 } MatchState;
 
 
 #define L_ESC		'%'
 #define L_ESC		'%'
-#define SPECIALS	"^$*+?.([%-"
 
 
 static int check_capture(MatchState *ms, int l)
 static int check_capture(MatchState *ms, int l)
 {
 {
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
   return s;
   return s;
 }
 }
 
 
-static const char *lmemfind(const char *s1, size_t l1,
-			    const char *s2, size_t l2)
-{
-  if (l2 == 0) {
-    return s1;  /* empty strings are everywhere */
-  } else if (l2 > l1) {
-    return NULL;  /* avoids a negative `l1' */
-  } else {
-    const char *init;  /* to search for a `*s2' inside `s1' */
-    l2--;  /* 1st char will be checked by `memchr' */
-    l1 = l1-l2;  /* `s2' cannot be found after that */
-    while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
-      init++;   /* 1st char is already checked */
-      if (memcmp(init, s2+1, l2) == 0) {
-	return init-1;
-      } else {  /* correct `l1' and `s1' to try again */
-	l1 -= (size_t)(init-s1);
-	s1 = init;
-      }
-    }
-    return NULL;  /* not found */
-  }
-}
-
 static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
 static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
 {
 {
   if (i >= ms->level) {
   if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
   return nlevels;  /* number of strings pushed */
   return nlevels;  /* number of strings pushed */
 }
 }
 
 
-static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
-{
-  /* relative string position: negative means back from end */
-  if (pos < 0) pos += (ptrdiff_t)len + 1;
-  return (pos >= 0) ? pos : 0;
-}
-
 static int str_find_aux(lua_State *L, int find)
 static int str_find_aux(lua_State *L, int find)
 {
 {
-  size_t l1, l2;
-  const char *s = luaL_checklstring(L, 1, &l1);
-  const char *p = luaL_checklstring(L, 2, &l2);
-  ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
-  if (init < 0) {
-    init = 0;
-  } else if ((size_t)(init) > l1) {
+  GCstr *s = lj_lib_checkstr(L, 1);
+  GCstr *p = lj_lib_checkstr(L, 2);
+  int32_t start = lj_lib_optint(L, 3, 1);
+  MSize st;
+  if (start < 0) start += (int32_t)s->len; else start--;
+  if (start < 0) start = 0;
+  st = (MSize)start;
+  if (st > s->len) {
 #if LJ_52
 #if LJ_52
     setnilV(L->top-1);
     setnilV(L->top-1);
     return 1;
     return 1;
 #else
 #else
-    init = (ptrdiff_t)l1;
+    st = s->len;
 #endif
 #endif
   }
   }
-  if (find && (lua_toboolean(L, 4) ||  /* explicit request? */
-      strpbrk(p, SPECIALS) == NULL)) {  /* or no special characters? */
-    /* do a plain search */
-    const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
-    if (s2) {
-      lua_pushinteger(L, s2-s+1);
-      lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
+  if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
+	       !lj_str_haspattern(p))) {  /* Search for fixed string. */
+    const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
+    if (q) {
+      setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
+      setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
       return 2;
       return 2;
     }
     }
-  } else {
+  } else {  /* Search for pattern. */
     MatchState ms;
     MatchState ms;
-    int anchor = (*p == '^') ? (p++, 1) : 0;
-    const char *s1=s+init;
+    const char *pstr = strdata(p);
+    const char *sstr = strdata(s) + st;
+    int anchor = 0;
+    if (*pstr == '^') { pstr++; anchor = 1; }
     ms.L = L;
     ms.L = L;
-    ms.src_init = s;
-    ms.src_end = s+l1;
-    do {
-      const char *res;
+    ms.src_init = strdata(s);
+    ms.src_end = strdata(s) + s->len;
+    do {  /* Loop through string and try to match the pattern. */
+      const char *q;
       ms.level = ms.depth = 0;
       ms.level = ms.depth = 0;
-      if ((res=match(&ms, s1, p)) != NULL) {
+      q = match(&ms, sstr, pstr);
+      if (q) {
 	if (find) {
 	if (find) {
-	  lua_pushinteger(L, s1-s+1);  /* start */
-	  lua_pushinteger(L, res-s);   /* end */
-	  return push_captures(&ms, NULL, 0) + 2;
+	  setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
+	  setintV(L->top++, (int32_t)(q-strdata(s)));
+	  return push_captures(&ms, NULL, NULL) + 2;
 	} else {
 	} else {
-	  return push_captures(&ms, s1, res);
+	  return push_captures(&ms, sstr, q);
 	}
 	}
       }
       }
-    } while (s1++ < ms.src_end && !anchor);
+    } while (sstr++ < ms.src_end && !anchor);
   }
   }
-  lua_pushnil(L);  /* not found */
+  setnilV(L->top-1);  /* Not found. */
   return 1;
   return 1;
 }
 }
 
 
-LJLIB_CF(string_find)
+LJLIB_CF(string_find)		LJLIB_REC(.)
 {
 {
   return str_find_aux(L, 1);
   return str_find_aux(L, 1);
 }
 }
@@ -698,221 +640,91 @@ LJLIB_CF(string_gsub)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
-#define MAX_FMTITEM	512
-/* valid flags in a format specification */
-#define FMT_FLAGS	"-+ #0"
-/*
-** maximum size of each format specification (such as '%-099.99d')
-** (+10 accounts for %99.99x plus margin of error)
-*/
-#define MAX_FMTSPEC	(sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
-
-static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
-{
-  GCstr *str = lj_lib_checkstr(L, arg);
-  int32_t len = (int32_t)str->len;
-  const char *s = strdata(str);
-  luaL_addchar(b, '"');
-  while (len--) {
-    uint32_t c = uchar(*s);
-    if (c == '"' || c == '\\' || c == '\n') {
-      luaL_addchar(b, '\\');
-    } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
-      uint32_t d;
-      luaL_addchar(b, '\\');
-      if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
-	luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
-	goto tens;
-      } else if (c >= 10) {
-      tens:
-	d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
-      }
-      c += '0';
-    }
-    luaL_addchar(b, c);
-    s++;
-  }
-  luaL_addchar(b, '"');
-}
-
-static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
-{
-  const char *p = strfrmt;
-  while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++;  /* skip flags */
-  if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
-    lj_err_caller(L, LJ_ERR_STRFMTR);
-  if (lj_char_isdigit(uchar(*p))) p++;  /* skip width */
-  if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  if (*p == '.') {
-    p++;
-    if (lj_char_isdigit(uchar(*p))) p++;  /* skip precision */
-    if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  }
-  if (lj_char_isdigit(uchar(*p)))
-    lj_err_caller(L, LJ_ERR_STRFMTW);
-  *(form++) = '%';
-  strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
-  form += p - strfrmt + 1;
-  *form = '\0';
-  return p;
-}
-
-static void addintlen(char *form)
-{
-  size_t l = strlen(form);
-  char spec = form[l - 1];
-  strcpy(form + l - 1, LUA_INTFRMLEN);
-  form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
-  form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
-}
-
-static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (LUA_INTFRM_T)intV(o);
-    else
-      return (LUA_INTFRM_T)numV(o);
-  }
-}
-
-static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (unsigned LUA_INTFRM_T)intV(o);
-    else if ((int32_t)o->u32.hi < 0)
-      return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
-    else
-      return (unsigned LUA_INTFRM_T)numV(o);
-  }
-}
-
-static GCstr *meta_tostring(lua_State *L, int arg)
+/* Emulate tostring() inline. */
+static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
 {
 {
   TValue *o = L->base+arg-1;
   TValue *o = L->base+arg-1;
   cTValue *mo;
   cTValue *mo;
   lua_assert(o < L->top);  /* Caller already checks for existence. */
   lua_assert(o < L->top);  /* Caller already checks for existence. */
   if (LJ_LIKELY(tvisstr(o)))
   if (LJ_LIKELY(tvisstr(o)))
     return strV(o);
     return strV(o);
-  if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+  if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
     copyTV(L, L->top++, mo);
     copyTV(L, L->top++, mo);
     copyTV(L, L->top++, o);
     copyTV(L, L->top++, o);
     lua_call(L, 1, 1);
     lua_call(L, 1, 1);
-    L->top--;
-    if (tvisstr(L->top))
-      return strV(L->top);
-    o = L->base+arg-1;
-    copyTV(L, o, L->top);
-  }
-  if (tvisnumber(o)) {
-    return lj_str_fromnumber(L, o);
-  } else if (tvisnil(o)) {
-    return lj_str_newlit(L, "nil");
-  } else if (tvisfalse(o)) {
-    return lj_str_newlit(L, "false");
-  } else if (tvistrue(o)) {
-    return lj_str_newlit(L, "true");
-  } else {
-    if (tvisfunc(o) && isffunc(funcV(o)))
-      lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
-    else
-      lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
-    L->top--;
-    return strV(L->top);
+    copyTV(L, L->base+arg-1, --L->top);
+    return NULL;  /* Buffer may be overwritten, retry. */
   }
   }
-}
-
-LJLIB_CF(string_format)
-{
-  int arg = 1, top = (int)(L->top - L->base);
-  GCstr *fmt = lj_lib_checkstr(L, arg);
-  const char *strfrmt = strdata(fmt);
-  const char *strfrmt_end = strfrmt + fmt->len;
-  luaL_Buffer b;
-  luaL_buffinit(L, &b);
-  while (strfrmt < strfrmt_end) {
-    if (*strfrmt != L_ESC) {
-      luaL_addchar(&b, *strfrmt++);
-    } else if (*++strfrmt == L_ESC) {
-      luaL_addchar(&b, *strfrmt++);  /* %% */
-    } else { /* format item */
-      char form[MAX_FMTSPEC];  /* to store the format (`%...') */
-      char buff[MAX_FMTITEM];  /* to store the formatted item */
+  return lj_strfmt_obj(L, o);
+}
+
+LJLIB_CF(string_format)		LJLIB_REC(.)
+{
+  int arg, top = (int)(L->top - L->base);
+  GCstr *fmt;
+  SBuf *sb;
+  FormatState fs;
+  SFormat sf;
+  int retry = 0;
+again:
+  arg = 1;
+  sb = lj_buf_tmp_(L);
+  fmt = lj_lib_checkstr(L, arg);
+  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+    if (sf == STRFMT_LIT) {
+      lj_buf_putmem(sb, fs.str, fs.len);
+    } else if (sf == STRFMT_ERR) {
+      lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
+    } else {
       if (++arg > top)
       if (++arg > top)
 	luaL_argerror(L, arg, lj_obj_typename[0]);
 	luaL_argerror(L, arg, lj_obj_typename[0]);
-      strfrmt = scanformat(L, strfrmt, form);
-      switch (*strfrmt++) {
-      case 'c':
-	sprintf(buff, form, lj_lib_checkint(L, arg));
+      switch (STRFMT_TYPE(sf)) {
+      case STRFMT_INT:
+	if (tvisint(L->base+arg-1)) {
+	  int32_t k = intV(L->base+arg-1);
+	  if (sf == STRFMT_INT)
+	    lj_strfmt_putint(sb, k);  /* Shortcut for plain %d. */
+	  else
+	    lj_strfmt_putfxint(sb, sf, k);
+	} else {
+	  lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+	}
 	break;
 	break;
-      case 'd':  case 'i':
-	addintlen(form);
-	sprintf(buff, form, num2intfrm(L, arg));
+      case STRFMT_UINT:
+	if (tvisint(L->base+arg-1))
+	  lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
+	else
+	  lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
 	break;
 	break;
-      case 'o':  case 'u':  case 'x':  case 'X':
-	addintlen(form);
-	sprintf(buff, form, num2uintfrm(L, arg));
+      case STRFMT_NUM:
+	lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
 	break;
 	break;
-      case 'e':  case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
-	TValue tv;
-	tv.n = lj_lib_checknum(L, arg);
-	if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
-	  /* Canonicalize output of non-finite values. */
-	  char *p, nbuf[LJ_STR_NUMBUF];
-	  size_t len = lj_str_bufnum(nbuf, &tv);
-	  if (strfrmt[-1] < 'a') {
-	    nbuf[len-3] = nbuf[len-3] - 0x20;
-	    nbuf[len-2] = nbuf[len-2] - 0x20;
-	    nbuf[len-1] = nbuf[len-1] - 0x20;
-	  }
-	  nbuf[len] = '\0';
-	  for (p = form; *p < 'A' && *p != '.'; p++) ;
-	  *p++ = 's'; *p = '\0';
-	  sprintf(buff, form, nbuf);
-	  break;
-	}
-	sprintf(buff, form, (double)tv.n);
+      case STRFMT_STR: {
+	GCstr *str = string_fmt_tostring(L, arg, retry);
+	if (str == NULL)
+	  retry = 1;
+	else if ((sf & STRFMT_T_QUOTED))
+	  lj_strfmt_putquoted(sb, str);  /* No formatting. */
+	else
+	  lj_strfmt_putfstr(sb, sf, str);
 	break;
 	break;
 	}
 	}
-      case 'q':
-	addquoted(L, &b, arg);
-	continue;
-      case 'p':
-	lj_str_pushf(L, "%p", lua_topointer(L, arg));
-	luaL_addvalue(&b);
-	continue;
-      case 's': {
-	GCstr *str = meta_tostring(L, arg);
-	if (!strchr(form, '.') && str->len >= 100) {
-	  /* no precision and string is too long to be formatted;
-	     keep original string */
-	  setstrV(L, L->top++, str);
-	  luaL_addvalue(&b);
-	  continue;
-	}
-	sprintf(buff, form, strdata(str));
+      case STRFMT_CHAR:
+	lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+	break;
+      case STRFMT_PTR:  /* No formatting. */
+	lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
 	break;
 	break;
-	}
       default:
       default:
-	lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
+	lua_assert(0);
 	break;
 	break;
       }
       }
-      luaL_addlstring(&b, buff, strlen(buff));
     }
     }
   }
   }
-  luaL_pushresult(&b);
+  if (retry++ == 1) goto again;
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 

+ 90 - 83
jni/LuaJIT-2.0.1/src/lib_table.c → jni/LuaJIT-2.1/src/lib_table.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Table library.
 ** Table library.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -16,57 +16,43 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
+#include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_table
 #define LJLIB_MODULE_table
 
 
-LJLIB_CF(table_foreachi)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  MSize i, n = lj_tab_len(t);
-  for (i = 1; i <= n; i++) {
-    cTValue *val;
-    setfuncV(L, L->top, func);
-    setintV(L->top+1, i);
-    val = lj_tab_getint(t, (int32_t)i);
-    if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreachi) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for i=1,#t do
+      local r = f(i, t[i])
+      if r ~= nil then return r end
+    end
+  end
+*/
 
 
-LJLIB_CF(table_foreach)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  L->top = L->base+3;
-  setnilV(L->top-1);
-  while (lj_tab_next(L, t, L->top-1)) {
-    copyTV(L, L->top+2, L->top);
-    copyTV(L, L->top+1, L->top-1);
-    setfuncV(L, L->top, func);
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreach) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for k, v in PAIRS(t) do
+      local r = f(k, v)
+      if r ~= nil then return r end
+    end
+  end
+*/
 
 
-LJLIB_ASM(table_getn)		LJLIB_REC(.)
-{
-  lj_lib_checktab(L, 1);
-  return FFH_UNREACHABLE;
-}
+LJLIB_LUA(table_getn) /*
+  function(t)
+    CHECK_tab(t)
+    return #t
+  end
+*/
 
 
 LJLIB_CF(table_maxn)
 LJLIB_CF(table_maxn)
 {
 {
@@ -119,52 +105,47 @@ LJLIB_CF(table_insert)		LJLIB_REC(.)
   return 0;
   return 0;
 }
 }
 
 
-LJLIB_CF(table_remove)		LJLIB_REC(.)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  int32_t e = (int32_t)lj_tab_len(t);
-  int32_t pos = lj_lib_optint(L, 2, e);
-  if (!(1 <= pos && pos <= e))  /* Nothing to remove? */
-    return 0;
-  lua_rawgeti(L, 1, pos);  /* Get previous value. */
-  /* NOBARRIER: This just moves existing elements around. */
-  for (; pos < e; pos++) {
-    cTValue *src = lj_tab_getint(t, pos+1);
-    TValue *dst = lj_tab_setint(L, t, pos);
-    if (src) {
-      copyTV(L, dst, src);
-    } else {
-      setnilV(dst);
-    }
-  }
-  setnilV(lj_tab_setint(L, t, e));  /* Remove (last) value. */
-  return 1;  /* Return previous value. */
-}
+LJLIB_LUA(table_remove) /*
+  function(t, pos)
+    CHECK_tab(t)
+    local len = #t
+    if pos == nil then
+      if len ~= 0 then
+	local old = t[len]
+	t[len] = nil
+	return old
+      end
+    else
+      CHECK_int(pos)
+      if pos >= 1 and pos <= len then
+	local old = t[pos]
+	for i=pos+1,len do
+	  t[i-1] = t[i]
+	end
+	t[len] = nil
+	return old
+      end
+    end
+  end
+*/
 
 
-LJLIB_CF(table_concat)
+LJLIB_CF(table_concat)		LJLIB_REC(.)
 {
 {
-  luaL_Buffer b;
   GCtab *t = lj_lib_checktab(L, 1);
   GCtab *t = lj_lib_checktab(L, 1);
   GCstr *sep = lj_lib_optstr(L, 2);
   GCstr *sep = lj_lib_optstr(L, 2);
-  MSize seplen = sep ? sep->len : 0;
   int32_t i = lj_lib_optint(L, 3, 1);
   int32_t i = lj_lib_optint(L, 3, 1);
-  int32_t e = L->base+3 < L->top ? lj_lib_checkint(L, 4) :
-				   (int32_t)lj_tab_len(t);
-  luaL_buffinit(L, &b);
-  if (i <= e) {
-    for (;;) {
-      cTValue *o;
-      lua_rawgeti(L, 1, i);
-      o = L->top-1;
-      if (!(tvisstr(o) || tvisnumber(o)))
-	lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
-      luaL_addvalue(&b);
-      if (i++ == e) break;
-      if (seplen)
-	luaL_addlstring(&b, strdata(sep), seplen);
-    }
+  int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
+	      lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
+  SBuf *sb = lj_buf_tmp_(L);
+  SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
+  if (LJ_UNLIKELY(!sbx)) {  /* Error: bad element type. */
+    int32_t idx = (int32_t)(intptr_t)sbufP(sb);
+    cTValue *o = lj_tab_getint(t, idx);
+    lj_err_callerv(L, LJ_ERR_TABCAT,
+		   lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
   }
   }
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sbx));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 
@@ -284,6 +265,30 @@ LJLIB_CF(table_pack)
 }
 }
 #endif
 #endif
 
 
+LJLIB_NOREG LJLIB_CF(table_new)		LJLIB_REC(.)
+{
+  int32_t a = lj_lib_checkint(L, 1);
+  int32_t h = lj_lib_checkint(L, 2);
+  lua_createtable(L, a, h);
+  return 1;
+}
+
+LJLIB_NOREG LJLIB_CF(table_clear)	LJLIB_REC(.)
+{
+  lj_tab_clear(lj_lib_checktab(L, 1));
+  return 0;
+}
+
+static int luaopen_table_new(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
+}
+
+static int luaopen_table_clear(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
+}
+
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #include "lj_libdef.h"
 #include "lj_libdef.h"
@@ -295,6 +300,8 @@ LUALIB_API int luaopen_table(lua_State *L)
   lua_getglobal(L, "unpack");
   lua_getglobal(L, "unpack");
   lua_setfield(L, -2, "unpack");
   lua_setfield(L, -2, "unpack");
 #endif
 #endif
+  lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
+  lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
   return 1;
   return 1;
 }
 }
 
 

+ 15 - 0
jni/LuaJIT-2.0.1/src/lj.supp → jni/LuaJIT-2.1/src/lj.supp

@@ -24,3 +24,18 @@
    Memcheck:Cond
    Memcheck:Cond
    fun:lj_str_new
    fun:lj_str_new
 }
 }
+{
+   Optimized string compare
+   Memcheck:Addr4
+   fun:lj_str_fastcmp
+}
+{
+   Optimized string compare
+   Memcheck:Addr1
+   fun:lj_str_fastcmp
+}
+{
+   Optimized string compare
+   Memcheck:Cond
+   fun:lj_str_fastcmp
+}

+ 30 - 13
jni/LuaJIT-2.0.1/src/lj_alloc.c → jni/LuaJIT-2.1/src/lj_alloc.c

@@ -77,7 +77,7 @@
 #define WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <windows.h>
 
 
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 
 
 /* Undocumented, but hey, that's what we all love so much about Windows. */
 /* Undocumented, but hey, that's what we all love so much about Windows. */
 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
@@ -174,42 +174,55 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
 #endif
 #endif
 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
 
 
-#if LJ_64
-/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
+#if LJ_64 && !LJ_GC64
+/* 64 bit mode with 32 bit pointers needs special support for allocating
+** memory in the lower 2GB.
+*/
 
 
-#if LJ_TARGET_LINUX
+#if defined(MAP_32BIT)
 
 
+#if defined(__sun__)
+#define MMAP_REGION_START	((uintptr_t)0x1000)
+#else
 /* Actually this only gives us max. 1GB in current Linux kernels. */
 /* Actually this only gives us max. 1GB in current Linux kernels. */
+#define MMAP_REGION_START	((uintptr_t)0)
+#endif
+
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 {
 {
   int olderr = errno;
   int olderr = errno;
-  void *ptr = mmap(NULL, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
+  void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
   errno = olderr;
   errno = olderr;
   return ptr;
   return ptr;
 }
 }
 
 
-#elif LJ_TARGET_OSX || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__OpenBSD__)
+#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || defined(__CYGWIN__)
 
 
 /* OSX and FreeBSD mmap() use a naive first-fit linear search.
 /* OSX and FreeBSD mmap() use a naive first-fit linear search.
 ** That's perfect for us. Except that -pagezero_size must be set for OSX,
 ** That's perfect for us. Except that -pagezero_size must be set for OSX,
 ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
 ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
 ** to be reduced to 250MB on FreeBSD.
 ** to be reduced to 250MB on FreeBSD.
 */
 */
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__OpenBSD__)
-#include <sys/resource.h>
-#define MMAP_REGION_START	((uintptr_t)0x10000000)
-#else
+#if LJ_TARGET_OSX || defined(__DragonFly__)
 #define MMAP_REGION_START	((uintptr_t)0x10000)
 #define MMAP_REGION_START	((uintptr_t)0x10000)
+#elif LJ_TARGET_PS4
+#define MMAP_REGION_START	((uintptr_t)0x4000)
+#else
+#define MMAP_REGION_START	((uintptr_t)0x10000000)
 #endif
 #endif
 #define MMAP_REGION_END		((uintptr_t)0x80000000)
 #define MMAP_REGION_END		((uintptr_t)0x80000000)
 
 
+#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
+#include <sys/resource.h>
+#endif
+
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 {
 {
   int olderr = errno;
   int olderr = errno;
   /* Hint for next allocation. Doesn't need to be thread-safe. */
   /* Hint for next allocation. Doesn't need to be thread-safe. */
   static uintptr_t alloc_hint = MMAP_REGION_START;
   static uintptr_t alloc_hint = MMAP_REGION_START;
   int retry = 0;
   int retry = 0;
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
   static int rlimit_modified = 0;
   static int rlimit_modified = 0;
   if (LJ_UNLIKELY(rlimit_modified == 0)) {
   if (LJ_UNLIKELY(rlimit_modified == 0)) {
     struct rlimit rlim;
     struct rlimit rlim;
@@ -227,6 +240,10 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
       return p;
       return p;
     }
     }
     if (p != CMFAIL) munmap(p, size);
     if (p != CMFAIL) munmap(p, size);
+#if defined(__sun__) || defined(__DragonFly__)
+    alloc_hint += 0x1000000;  /* Need near-exhaustive linear scan. */
+    if (alloc_hint + size < MMAP_REGION_END) continue;
+#endif
     if (retry) break;
     if (retry) break;
     retry = 1;
     retry = 1;
     alloc_hint = MMAP_REGION_START;
     alloc_hint = MMAP_REGION_START;
@@ -243,7 +260,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
 
 
 #else
 #else
 
 
-/* 32 bit mode is easy. */
+/* 32 bit mode and GC64 mode is easy. */
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 {
 {
   int olderr = errno;
   int olderr = errno;
@@ -279,7 +296,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_MAYMOVE	1
 #define CALL_MREMAP_MAYMOVE	1
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #else
 #else
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE

+ 0 - 0
jni/LuaJIT-2.0.1/src/lj_alloc.h → jni/LuaJIT-2.1/src/lj_alloc.h


+ 76 - 63
jni/LuaJIT-2.0.1/src/lj_api.c → jni/LuaJIT-2.1/src/lj_api.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Public Lua/C API.
 ** Public Lua/C API.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -24,6 +24,7 @@
 #include "lj_trace.h"
 #include "lj_trace.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "lj_strscan.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 
 /* -- Common helper functions --------------------------------------------- */
 /* -- Common helper functions --------------------------------------------- */
 
 
@@ -188,7 +189,7 @@ LUA_API int lua_type(lua_State *L, int idx)
   cTValue *o = index2adr(L, idx);
   cTValue *o = index2adr(L, idx);
   if (tvisnumber(o)) {
   if (tvisnumber(o)) {
     return LUA_TNUMBER;
     return LUA_TNUMBER;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o)) {
   } else if (tvislightud(o)) {
     return LUA_TLIGHTUSERDATA;
     return LUA_TLIGHTUSERDATA;
 #endif
 #endif
@@ -268,7 +269,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     return 0;
     return 0;
   } else if (tvispri(o1)) {
   } else if (tvispri(o1)) {
     return o1 != niltv(L) && o2 != niltv(L);
     return o1 != niltv(L) && o2 != niltv(L);
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o1)) {
   } else if (tvislightud(o1)) {
     return o1->u64 == o2->u64;
     return o1->u64 == o2->u64;
 #endif
 #endif
@@ -283,8 +284,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     } else {
     } else {
       L->top = base+2;
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
     }
   }
   }
 }
 }
@@ -306,8 +307,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
     } else {
     } else {
       L->top = base+2;
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
     }
   }
   }
 }
 }
@@ -434,7 +435,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
   } else {
   } else {
     if (len != NULL) *len = 0;
     if (len != NULL) *len = 0;
@@ -453,7 +454,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
   } else {
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +476,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
   } else {
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +508,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
   } else if (tvisudata(o)) {
   } else if (tvisudata(o)) {
     return udataV(o)->len;
     return udataV(o)->len;
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
-    GCstr *s = lj_str_fromnumber(L, o);
+    GCstr *s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
     return s->len;
     return s->len;
   } else {
   } else {
@@ -545,17 +546,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
 
 
 LUA_API const void *lua_topointer(lua_State *L, int idx)
 LUA_API const void *lua_topointer(lua_State *L, int idx)
 {
 {
-  cTValue *o = index2adr(L, idx);
-  if (tvisudata(o))
-    return uddata(udataV(o));
-  else if (tvislightud(o))
-    return lightudV(o);
-  else if (tviscdata(o))
-    return cdataptr(cdataV(o));
-  else if (tvisgcv(o))
-    return gcV(o);
-  else
-    return NULL;
+  return lj_obj_ptr(index2adr(L, idx));
 }
 }
 
 
 /* -- Stack setters (object creation) ------------------------------------- */
 /* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +597,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
 				     va_list argp)
 				     va_list argp)
 {
 {
   lj_gc_check(L);
   lj_gc_check(L);
-  return lj_str_pushvf(L, fmt, argp);
+  return lj_strfmt_pushvf(L, fmt, argp);
 }
 }
 
 
 LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
 LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +606,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
   va_list argp;
   va_list argp;
   lj_gc_check(L);
   lj_gc_check(L);
   va_start(argp, fmt);
   va_start(argp, fmt);
-  ret = lj_str_pushvf(L, fmt, argp);
+  ret = lj_strfmt_pushvf(L, fmt, argp);
   va_end(argp);
   va_end(argp);
   return ret;
   return ret;
 }
 }
@@ -649,10 +640,8 @@ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
 
 
 LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
 LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
 {
 {
-  GCtab *t;
   lj_gc_check(L);
   lj_gc_check(L);
-  t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec));
-  settabV(L, L->top, t);
+  settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
   incr_top(L);
   incr_top(L);
 }
 }
 
 
@@ -715,8 +704,8 @@ LUA_API void lua_concat(lua_State *L, int n)
       n -= (int)(L->top - top);
       n -= (int)(L->top - top);
       L->top = top+2;
       L->top = top+2;
       lj_vm_call(L, top, 1+1);
       lj_vm_call(L, top, 1+1);
-      L->top--;
-      copyTV(L, L->top-1, L->top);
+      L->top -= 1+LJ_FR2;
+      copyTV(L, L->top-1, L->top+LJ_FR2);
     } while (--n > 0);
     } while (--n > 0);
   } else if (n == 0) {  /* Push empty string. */
   } else if (n == 0) {  /* Push empty string. */
     setstrV(L, L->top, &G(L)->strempty);
     setstrV(L, L->top, &G(L)->strempty);
@@ -735,8 +724,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
   if (v == NULL) {
   if (v == NULL) {
     L->top += 2;
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   }
   copyTV(L, L->top-1, v);
   copyTV(L, L->top-1, v);
 }
 }
@@ -751,8 +740,8 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
   if (v == NULL) {
   if (v == NULL) {
     L->top += 2;
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   }
   copyTV(L, L->top, v);
   copyTV(L, L->top, v);
   incr_top(L);
   incr_top(L);
@@ -893,13 +882,14 @@ LUA_API void lua_settable(lua_State *L, int idx)
   o = lj_meta_tset(L, t, L->top-2);
   o = lj_meta_tset(L, t, L->top-2);
   if (o) {
   if (o) {
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top-1);
     L->top -= 2;
     L->top -= 2;
+    copyTV(L, o, L->top+1);
   } else {
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 3;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 3+LJ_FR2;
   }
   }
 }
 }
 
 
@@ -913,14 +903,14 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
   setstrV(L, &key, lj_str_newz(L, k));
   setstrV(L, &key, lj_str_newz(L, k));
   o = lj_meta_tset(L, t, &key);
   o = lj_meta_tset(L, t, &key);
   if (o) {
   if (o) {
-    L->top--;
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top);
+    copyTV(L, o, --L->top);
   } else {
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 2;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 2+LJ_FR2;
   }
   }
 }
 }
 
 
@@ -1027,11 +1017,24 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
 
 
 /* -- Calls --------------------------------------------------------------- */
 /* -- Calls --------------------------------------------------------------- */
 
 
+#if LJ_FR2
+static TValue *api_call_base(lua_State *L, int nargs)
+{
+  TValue *o = L->top, *base = o - nargs;
+  L->top = o+1;
+  for (; o > base; o--) copyTV(L, o, o-1);
+  setnilV(o);
+  return o+1;
+}
+#else
+#define api_call_base(L, nargs)	(L->top - (nargs))
+#endif
+
 LUA_API void lua_call(lua_State *L, int nargs, int nresults)
 LUA_API void lua_call(lua_State *L, int nargs, int nresults)
 {
 {
   api_check(L, L->status == 0 || L->status == LUA_ERRERR);
   api_check(L, L->status == 0 || L->status == LUA_ERRERR);
   api_checknelems(L, nargs+1);
   api_checknelems(L, nargs+1);
-  lj_vm_call(L, L->top - nargs, nresults+1);
+  lj_vm_call(L, api_call_base(L, nargs), nresults+1);
 }
 }
 
 
 LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
 LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1049,7 +1052,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
     api_checkvalidindex(L, o);
     api_checkvalidindex(L, o);
     ef = savestack(L, o);
     ef = savestack(L, o);
   }
   }
-  status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
+  status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
   if (status) hook_restore(g, oldh);
   if (status) hook_restore(g, oldh);
   return status;
   return status;
 }
 }
@@ -1057,12 +1060,14 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
 static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
 static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
 {
 {
   GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
   GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
+  TValue *top = L->top;
   fn->c.f = func;
   fn->c.f = func;
-  setfuncV(L, L->top, fn);
-  setlightudV(L->top+1, checklightudptr(L, ud));
+  setfuncV(L, top++, fn);
+  if (LJ_FR2) setnilV(top++);
+  setlightudV(top++, checklightudptr(L, ud));
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */
-  L->top += 2;
-  return L->top-1;  /* Now call the newly allocated C function. */
+  L->top = top;
+  return top-1;  /* Now call the newly allocated C function. */
 }
 }
 
 
 LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
 LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1079,10 +1084,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
 LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 {
 {
   if (luaL_getmetafield(L, idx, field)) {
   if (luaL_getmetafield(L, idx, field)) {
-    TValue *base = L->top--;
-    copyTV(L, base, index2adr(L, idx));
-    L->top = base+1;
-    lj_vm_call(L, base, 1+1);
+    TValue *top = L->top--;
+    if (LJ_FR2) setnilV(top++);
+    copyTV(L, top++, index2adr(L, idx));
+    L->top = top;
+    lj_vm_call(L, top-1, 1+1);
     return 1;
     return 1;
   }
   }
   return 0;
   return 0;
@@ -1109,12 +1115,14 @@ LUA_API int lua_yield(lua_State *L, int nresults)
     } else {  /* Yield from hook: add a pseudo-frame. */
     } else {  /* Yield from hook: add a pseudo-frame. */
       TValue *top = L->top;
       TValue *top = L->top;
       hook_leave(g);
       hook_leave(g);
-      top->u64 = cframe_multres(cf);
-      setcont(top+1, lj_cont_hook);
-      setframe_pc(top+1, cframe_pc(cf)-1);
-      setframe_gc(top+2, obj2gco(L));
-      setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT);
-      L->top = L->base = top+3;
+      (top++)->u64 = cframe_multres(cf);
+      setcont(top, lj_cont_hook);
+      if (LJ_FR2) top++;
+      setframe_pc(top, cframe_pc(cf)-1);
+      if (LJ_FR2) top++;
+      setframe_gc(top, obj2gco(L), LJ_TTHREAD);
+      setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
+      L->top = L->base = top+1;
 #if LJ_TARGET_X64
 #if LJ_TARGET_X64
       lj_err_throw(L, LUA_YIELD);
       lj_err_throw(L, LUA_YIELD);
 #else
 #else
@@ -1131,7 +1139,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
 LUA_API int lua_resume(lua_State *L, int nargs)
 LUA_API int lua_resume(lua_State *L, int nargs)
 {
 {
   if (L->cframe == NULL && L->status <= LUA_YIELD)
   if (L->cframe == NULL && L->status <= LUA_YIELD)
-    return lj_vm_resume(L, L->top - nargs, 0, 0);
+    return lj_vm_resume(L,
+      L->status == 0 ? api_call_base(L, nargs) : L->top - nargs,
+      0, 0);
   L->top = L->base;
   L->top = L->base;
   setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
   setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
   incr_top(L);
   incr_top(L);
@@ -1161,10 +1171,10 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
     res = (int)(g->gc.total & 0x3ff);
     res = (int)(g->gc.total & 0x3ff);
     break;
     break;
   case LUA_GCSTEP: {
   case LUA_GCSTEP: {
-    MSize a = (MSize)data << 10;
+    GCSize a = (GCSize)data << 10;
     g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
     g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
     while (g->gc.total >= g->gc.threshold)
     while (g->gc.total >= g->gc.threshold)
-      if (lj_gc_step(L)) {
+      if (lj_gc_step(L) > 0) {
 	res = 1;
 	res = 1;
 	break;
 	break;
       }
       }
@@ -1178,6 +1188,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
     res = (int)(g->gc.stepmul);
     res = (int)(g->gc.stepmul);
     g->gc.stepmul = (MSize)data;
     g->gc.stepmul = (MSize)data;
     break;
     break;
+  case LUA_GCISRUNNING:
+    res = (g->gc.threshold != LJ_MAX_MEM);
+    break;
   default:
   default:
     res = -1;  /* Invalid option. */
     res = -1;  /* Invalid option. */
   }
   }

+ 146 - 49
jni/LuaJIT-2.0.1/src/lj_arch.h → jni/LuaJIT-2.1/src/lj_arch.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Target architecture selection.
 ** Target architecture selection.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_ARCH_H
 #ifndef _LJ_ARCH_H
@@ -19,10 +19,10 @@
 #define LUAJIT_ARCH_x64		2
 #define LUAJIT_ARCH_x64		2
 #define LUAJIT_ARCH_ARM		3
 #define LUAJIT_ARCH_ARM		3
 #define LUAJIT_ARCH_arm		3
 #define LUAJIT_ARCH_arm		3
-#define LUAJIT_ARCH_PPC		4
-#define LUAJIT_ARCH_ppc		4
-#define LUAJIT_ARCH_PPCSPE	5
-#define LUAJIT_ARCH_ppcspe	5
+#define LUAJIT_ARCH_ARM64	4
+#define LUAJIT_ARCH_arm64	4
+#define LUAJIT_ARCH_PPC		5
+#define LUAJIT_ARCH_ppc		5
 #define LUAJIT_ARCH_MIPS	6
 #define LUAJIT_ARCH_MIPS	6
 #define LUAJIT_ARCH_mips	6
 #define LUAJIT_ARCH_mips	6
 
 
@@ -43,12 +43,10 @@
 #define LUAJIT_TARGET	LUAJIT_ARCH_X64
 #define LUAJIT_TARGET	LUAJIT_ARCH_X64
 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM
+#elif defined(__aarch64__)
+#define LUAJIT_TARGET	LUAJIT_ARCH_ARM64
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
-#ifdef __NO_FPRS__
-#define LUAJIT_TARGET	LUAJIT_ARCH_PPCSPE
-#else
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
-#endif
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS
 #else
 #else
@@ -66,8 +64,9 @@
 #define LUAJIT_OS	LUAJIT_OS_LINUX
 #define LUAJIT_OS	LUAJIT_OS_LINUX
 #elif defined(__MACH__) && defined(__APPLE__)
 #elif defined(__MACH__) && defined(__APPLE__)
 #define LUAJIT_OS	LUAJIT_OS_OSX
 #define LUAJIT_OS	LUAJIT_OS_OSX
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
-      defined(__NetBSD__) || defined(__OpenBSD__)
+#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
+       defined(__NetBSD__) || defined(__OpenBSD__) || \
+       defined(__DragonFly__)) && !defined(__ORBIS__)
 #define LUAJIT_OS	LUAJIT_OS_BSD
 #define LUAJIT_OS	LUAJIT_OS_BSD
 #elif (defined(__sun__) && defined(__svr4__)) || defined(__CYGWIN__)
 #elif (defined(__sun__) && defined(__svr4__)) || defined(__CYGWIN__)
 #define LUAJIT_OS	LUAJIT_OS_POSIX
 #define LUAJIT_OS	LUAJIT_OS_POSIX
@@ -95,7 +94,7 @@
 #define LJ_TARGET_WINDOWS	(LUAJIT_OS == LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_WINDOWS	(LUAJIT_OS == LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_LINUX		(LUAJIT_OS == LUAJIT_OS_LINUX)
 #define LJ_TARGET_LINUX		(LUAJIT_OS == LUAJIT_OS_LINUX)
 #define LJ_TARGET_OSX		(LUAJIT_OS == LUAJIT_OS_OSX)
 #define LJ_TARGET_OSX		(LUAJIT_OS == LUAJIT_OS_OSX)
-#define LJ_TARGET_IOS		(LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM)
+#define LJ_TARGET_IOS		(LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
 #define LJ_TARGET_POSIX		(LUAJIT_OS > LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_POSIX		(LUAJIT_OS > LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_DLOPEN	LJ_TARGET_POSIX
 #define LJ_TARGET_DLOPEN	LJ_TARGET_POSIX
 
 
@@ -104,11 +103,29 @@
 #define LJ_TARGET_CONSOLE	1
 #define LJ_TARGET_CONSOLE	1
 #endif
 #endif
 
 
+#ifdef __ORBIS__
+#define LJ_TARGET_PS4		1
+#define LJ_TARGET_CONSOLE	1
+#undef NULL
+#define NULL ((void*)0)
+#endif
+
+#ifdef __psp2__
+#define LJ_TARGET_PSVITA	1
+#define LJ_TARGET_CONSOLE	1
+#endif
+
 #if _XBOX_VER >= 200
 #if _XBOX_VER >= 200
 #define LJ_TARGET_XBOX360	1
 #define LJ_TARGET_XBOX360	1
 #define LJ_TARGET_CONSOLE	1
 #define LJ_TARGET_CONSOLE	1
 #endif
 #endif
 
 
+#ifdef _DURANGO
+#define LJ_TARGET_XBOXONE	1
+#define LJ_TARGET_CONSOLE	1
+#define LJ_TARGET_GC64		1
+#endif
+
 #define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
 #define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
 #define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
 #define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
 #define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
 #define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
@@ -138,7 +155,11 @@
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#define LJ_ABI_WIN		LJ_TARGET_WINDOWS
+#if LJ_TARGET_WINDOWS || __CYGWIN__
+#define LJ_ABI_WIN		1
+#else
+#define LJ_ABI_WIN		0
+#endif
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
 #define LJ_TARGET_EHRETREG	0
@@ -147,6 +168,9 @@
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNALIGNED	1
 #define LJ_TARGET_UNALIGNED	1
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
+#ifdef LUAJIT_ENABLE_GC64
+#define LJ_TARGET_GC64		1
+#endif
 
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
 
 
@@ -168,7 +192,9 @@
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 
 
-#if __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__
+#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
+#define LJ_ARCH_VERSION		80
+#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
 #define LJ_ARCH_VERSION		70
 #define LJ_ARCH_VERSION		70
 #elif __ARM_ARCH_6T2__
 #elif __ARM_ARCH_6T2__
 #define LJ_ARCH_VERSION		61
 #define LJ_ARCH_VERSION		61
@@ -178,15 +204,45 @@
 #define LJ_ARCH_VERSION		50
 #define LJ_ARCH_VERSION		50
 #endif
 #endif
 
 
+#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
+
+#define LJ_ARCH_NAME		"arm64"
+#define LJ_ARCH_BITS		64
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#define LJ_TARGET_ARM64		1
+#define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_JUMPRANGE	27	/* +-2^27 = +-128MB */
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+#define LJ_ARCH_NOJIT		1	/* NYI */
+
+#define LJ_ARCH_VERSION		80
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 
 
-#define LJ_ARCH_NAME		"ppc"
+#ifndef LJ_ARCH_ENDIAN
+#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#else
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#endif
+#endif
+
 #if _LP64
 #if _LP64
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_BITS		64
+#if LJ_ARCH_ENDIAN == LUAJIT_LE
+#define LJ_ARCH_NAME		"ppc64le"
+#else
+#define LJ_ARCH_NAME		"ppc64"
+#endif
 #else
 #else
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_BITS		32
+#define LJ_ARCH_NAME		"ppc"
 #endif
 #endif
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
+
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_EHRETREG	3
 #define LJ_TARGET_EHRETREG	3
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
@@ -195,6 +251,15 @@
 #define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
 #define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
 
 
+#if LJ_TARGET_CONSOLE
+#define LJ_ARCH_PPC32ON64	1
+#define LJ_ARCH_NOFFI		1
+#elif LJ_ARCH_BITS == 64
+#define LJ_ARCH_PPC64		1
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NOJIT		1	/* NYI */
+#endif
+
 #if _ARCH_PWR7
 #if _ARCH_PWR7
 #define LJ_ARCH_VERSION		70
 #define LJ_ARCH_VERSION		70
 #elif _ARCH_PWR6
 #elif _ARCH_PWR6
@@ -208,10 +273,6 @@
 #else
 #else
 #define LJ_ARCH_VERSION		0
 #define LJ_ARCH_VERSION		0
 #endif
 #endif
-#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
-#define LJ_ARCH_PPC64		1
-#define LJ_ARCH_NOFFI		1
-#endif
 #if _ARCH_PPCSQ
 #if _ARCH_PPCSQ
 #define LJ_ARCH_SQRT		1
 #define LJ_ARCH_SQRT		1
 #endif
 #endif
@@ -225,25 +286,6 @@
 #define LJ_ARCH_XENON		1
 #define LJ_ARCH_XENON		1
 #endif
 #endif
 
 
-#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
-
-#define LJ_ARCH_NAME		"ppcspe"
-#define LJ_ARCH_BITS		32
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
-#ifndef LJ_ABI_SOFTFP
-#define LJ_ABI_SOFTFP		1
-#endif
-#define LJ_ABI_EABI		1
-#define LJ_TARGET_PPCSPE	1
-#define LJ_TARGET_EHRETREG	3
-#define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
-#define LJ_TARGET_MASKSHIFT	0
-#define LJ_TARGET_MASKROT	1
-#define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
-#define LJ_ARCH_NOFFI		1	/* NYI: comparisons, calls. */
-#define LJ_ARCH_NOJIT		1
-
 #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
 #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
 
 
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
@@ -290,6 +332,16 @@
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
 #error "Need at least GCC 4.2 or newer"
 #error "Need at least GCC 4.2 or newer"
 #endif
 #endif
+#elif LJ_TARGET_ARM64
+#if __clang__
+#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+#error "Need at least Clang 3.5 or newer"
+#endif
+#else
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
+#error "Need at least GCC 4.8 or newer"
+#endif
+#endif
 #elif !LJ_TARGET_PS3
 #elif !LJ_TARGET_PS3
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #error "Need at least GCC 4.3 or newer"
 #error "Need at least GCC 4.3 or newer"
@@ -313,20 +365,33 @@
 #if !(__ARM_EABI__ || LJ_TARGET_IOS)
 #if !(__ARM_EABI__ || LJ_TARGET_IOS)
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
 #endif
 #endif
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+#if defined(__AARCH64EB__)
+#error "No support for big-endian ARM64"
+#endif
+#if defined(_ILP32)
+#error "No support for ILP32 model on ARM64"
+#endif
+#elif LJ_TARGET_PPC
 #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
 #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
 #error "No support for PowerPC CPUs without double-precision FPU"
 #error "No support for PowerPC CPUs without double-precision FPU"
 #endif
 #endif
-#if defined(_LITTLE_ENDIAN)
-#error "No support for little-endian PowerPC"
+#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
+#error "No support for little-endian PPC32"
 #endif
 #endif
-#if defined(_LP64)
-#error "No support for PowerPC 64 bit mode"
+#if LJ_ARCH_PPC64
+#error "No support for PowerPC 64 bit mode (yet)"
+#endif
+#ifdef __NO_FPRS__
+#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
 #endif
 #endif
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 #if defined(__mips_soft_float)
 #if defined(__mips_soft_float)
 #error "No support for MIPS CPUs without FPU"
 #error "No support for MIPS CPUs without FPU"
 #endif
 #endif
+#if defined(_LP64)
+#error "No support for MIPS64"
+#endif
 #endif
 #endif
 #endif
 #endif
 
 
@@ -351,8 +416,22 @@
 #endif
 #endif
 #endif
 #endif
 
 
+/* 64 bit GC references. */
+#if LJ_TARGET_GC64
+#define LJ_GC64			1
+#else
+#define LJ_GC64			0
+#endif
+
+/* 2-slot frame info. */
+#if LJ_GC64
+#define LJ_FR2			1
+#else
+#define LJ_FR2			0
+#endif
+
 /* Disable or enable the JIT compiler. */
 /* Disable or enable the JIT compiler. */
-#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
+#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
 #define LJ_HASJIT		0
 #define LJ_HASJIT		0
 #else
 #else
 #define LJ_HASJIT		1
 #define LJ_HASJIT		1
@@ -365,6 +444,21 @@
 #define LJ_HASFFI		1
 #define LJ_HASFFI		1
 #endif
 #endif
 
 
+#if defined(LUAJIT_DISABLE_PROFILE)
+#define LJ_HASPROFILE		0
+#elif LJ_TARGET_POSIX
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_SIGPROF	1
+#elif LJ_TARGET_PS3
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_PTHREAD	1
+#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_WTHREAD	1
+#else
+#define LJ_HASPROFILE		0
+#endif
+
 #ifndef LJ_ARCH_HASFPU
 #ifndef LJ_ARCH_HASFPU
 #define LJ_ARCH_HASFPU		1
 #define LJ_ARCH_HASFPU		1
 #endif
 #endif
@@ -397,15 +491,18 @@
 #define LJ_TARGET_UNALIGNED	0
 #define LJ_TARGET_UNALIGNED	0
 #endif
 #endif
 
 
-/* Various workarounds for embedded operating systems. */
-#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
+/* Various workarounds for embedded operating systems or weak C runtimes. */
+#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_LOG2
 #define LUAJIT_NO_LOG2
 #endif
 #endif
-#if defined(__symbian__)
+#if defined(__symbian__) || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_EXP2
 #define LUAJIT_NO_EXP2
 #endif
 #endif
+#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
+#define LJ_NO_SYSTEM		1
+#endif
 
 
-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3
+#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
 #define LJ_NO_UNWIND		1
 #define LJ_NO_UNWIND		1
 #endif
 #endif
 
 

+ 471 - 79
jni/LuaJIT-2.0.1/src/lj_asm.c → jni/LuaJIT-2.1/src/lj_asm.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** IR assembler (SSA IR -> machine code).
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_asm_c
 #define lj_asm_c
@@ -30,6 +30,10 @@
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "lj_target.h"
 #include "lj_target.h"
 
 
+#ifdef LUA_USE_ASSERT
+#include <stdio.h>
+#endif
+
 /* -- Assembler state and common macros ----------------------------------- */
 /* -- Assembler state and common macros ----------------------------------- */
 
 
 /* Assembler state. */
 /* Assembler state. */
@@ -38,6 +42,9 @@ typedef struct ASMState {
 
 
   MCode *mcp;		/* Current MCode pointer (grows down). */
   MCode *mcp;		/* Current MCode pointer (grows down). */
   MCode *mclim;		/* Lower limit for MCode memory + red zone. */
   MCode *mclim;		/* Lower limit for MCode memory + red zone. */
+#ifdef LUA_USE_ASSERT
+  MCode *mcp_prev;	/* Red zone overflow check. */
+#endif
 
 
   IRIns *ir;		/* Copy of pointer to IR instructions/constants. */
   IRIns *ir;		/* Copy of pointer to IR instructions/constants. */
   jit_State *J;		/* JIT compiler state. */
   jit_State *J;		/* JIT compiler state. */
@@ -110,14 +117,28 @@ typedef struct ASMState {
 
 
 /* Sparse limit checks using a red zone before the actual limit. */
 /* Sparse limit checks using a red zone before the actual limit. */
 #define MCLIM_REDZONE	64
 #define MCLIM_REDZONE	64
-#define checkmclim(as) \
-  if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as)
 
 
 static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
 static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
 {
 {
   lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
   lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
 }
 }
 
 
+static LJ_AINLINE void checkmclim(ASMState *as)
+{
+#ifdef LUA_USE_ASSERT
+  if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
+    IRIns *ir = IR(as->curins+1);
+    fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d  %02d %04d %04d\n", as->mcp,
+	    as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
+    lua_assert(0);
+  }
+#endif
+  if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
+#ifdef LUA_USE_ASSERT
+  as->mcp_prev = as->mcp;
+#endif
+}
+
 #ifdef RID_NUM_KREF
 #ifdef RID_NUM_KREF
 #define ra_iskref(ref)		((ref) < RID_NUM_KREF)
 #define ra_iskref(ref)		((ref) < RID_NUM_KREF)
 #define ra_krefreg(ref)		((Reg)(RID_MIN_KREF + (Reg)(ref)))
 #define ra_krefreg(ref)		((Reg)(RID_MIN_KREF + (Reg)(ref)))
@@ -158,6 +179,12 @@ IRFLDEF(FLOFS)
 #error "Missing instruction emitter for target CPU"
 #error "Missing instruction emitter for target CPU"
 #endif
 #endif
 
 
+/* Generic load/store of register from/to stack slot. */
+#define emit_spload(as, ir, r, ofs) \
+  emit_loadofs(as, ir, (r), RID_SP, (ofs))
+#define emit_spstore(as, ir, r, ofs) \
+  emit_storeofs(as, ir, (r), RID_SP, (ofs))
+
 /* -- Register allocator debugging ---------------------------------------- */
 /* -- Register allocator debugging ---------------------------------------- */
 
 
 /* #define LUAJIT_DEBUG_RA */
 /* #define LUAJIT_DEBUG_RA */
@@ -315,7 +342,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
     emit_getgl(as, r, jit_base);
     emit_getgl(as, r, jit_base);
   } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
   } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
     lua_assert(irt_isnil(ir->t));  /* REF_NIL stores ASMREF_L register. */
     lua_assert(irt_isnil(ir->t));  /* REF_NIL stores ASMREF_L register. */
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
 #if LJ_64
 #if LJ_64
   } else if (ir->o == IR_KINT64) {
   } else if (ir->o == IR_KINT64) {
     emit_loadu64(as, r, ir_kint64(ir)->u64);
     emit_loadu64(as, r, ir_kint64(ir)->u64);
@@ -332,6 +359,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
 static int32_t ra_spill(ASMState *as, IRIns *ir)
 static int32_t ra_spill(ASMState *as, IRIns *ir)
 {
 {
   int32_t slot = ir->s;
   int32_t slot = ir->s;
+  lua_assert(ir >= as->ir + REF_TRUE);
   if (!ra_hasspill(slot)) {
   if (!ra_hasspill(slot)) {
     if (irt_is64(ir->t)) {
     if (irt_is64(ir->t)) {
       slot = as->evenspill;
       slot = as->evenspill;
@@ -672,7 +700,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
 	emit_loadu64(as, dest, ir_kint64(ir)->u64);
 	emit_loadu64(as, dest, ir_kint64(ir)->u64);
 	return;
 	return;
 #endif
 #endif
-      } else {
+      } else if (ir->o != IR_KPRI) {
 	lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
 	lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
 		   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
 		   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
 	emit_loadi(as, dest, ir->i);
 	emit_loadi(as, dest, ir->i);
@@ -922,44 +950,6 @@ static void asm_snap_prep(ASMState *as)
 
 
 /* -- Miscellaneous helpers ----------------------------------------------- */
 /* -- Miscellaneous helpers ----------------------------------------------- */
 
 
-/* Collect arguments from CALL* and CARG instructions. */
-static void asm_collectargs(ASMState *as, IRIns *ir,
-			    const CCallInfo *ci, IRRef *args)
-{
-  uint32_t n = CCI_NARGS(ci);
-  lua_assert(n <= CCI_NARGS_MAX);
-  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
-  while (n-- > 1) {
-    ir = IR(ir->op1);
-    lua_assert(ir->o == IR_CARG);
-    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
-  }
-  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
-  lua_assert(IR(ir->op1)->o != IR_CARG);
-}
-
-/* Reconstruct CCallInfo flags for CALLX*. */
-static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
-{
-  uint32_t nargs = 0;
-  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
-    IRIns *ira = IR(ir->op1);
-    nargs++;
-    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
-  }
-#if LJ_HASFFI
-  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
-    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
-    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
-    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
-#if LJ_TARGET_X86
-    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
-#endif
-  }
-#endif
-  return (nargs | (ir->t.irt << CCI_OTSHIFT));
-}
-
 /* Calculate stack adjustment. */
 /* Calculate stack adjustment. */
 static int32_t asm_stack_adjust(ASMState *as)
 static int32_t asm_stack_adjust(ASMState *as)
 {
 {
@@ -1044,6 +1034,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
   as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
   as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
 }
 }
 
 
+/* -- Buffer operations --------------------------------------------------- */
+
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
+
+static void asm_bufhdr(ASMState *as, IRIns *ir)
+{
+  Reg sb = ra_dest(as, ir, RSET_GPR);
+  if ((ir->op2 & IRBUFHDR_APPEND)) {
+    /* Rematerialize const buffer pointer instead of likely spill. */
+    IRIns *irp = IR(ir->op1);
+    if (!(ra_hasreg(irp->r) || irp == ir-1 ||
+	  (irp == ir-2 && !ra_used(ir-1)))) {
+      while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
+	irp = IR(irp->op1);
+      if (irref_isk(irp->op1)) {
+	ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
+	ir = irp;
+      }
+    }
+  } else {
+    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+    /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
+    emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
+    emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
+  }
+#if LJ_TARGET_X86ORX64
+  ra_left(as, sb, ir->op1);
+#else
+  ra_leftov(as, sb, ir->op1);
+#endif
+}
+
+static void asm_bufput(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
+  IRRef args[3];
+  IRIns *irs;
+  int kchar = -1;
+  args[0] = ir->op1;  /* SBuf * */
+  args[1] = ir->op2;  /* GCstr * */
+  irs = IR(ir->op2);
+  lua_assert(irt_isstr(irs->t));
+  if (irs->o == IR_KGC) {
+    GCstr *s = ir_kstr(irs);
+    if (s->len == 1) {  /* Optimize put of single-char string constant. */
+      kchar = strdata(s)[0];
+      args[1] = ASMREF_TMP1;  /* int, truncated to char */
+      ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+    }
+  } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
+    if (irs->o == IR_TOSTR) {  /* Fuse number to string conversions. */
+      if (irs->op2 == IRTOSTR_NUM) {
+	args[1] = ASMREF_TMP1;  /* TValue * */
+	ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
+      } else {
+	lua_assert(irt_isinteger(IR(irs->op1)->t));
+	args[1] = irs->op1;  /* int */
+	if (irs->op2 == IRTOSTR_INT)
+	  ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
+	else
+	  ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+      }
+    } else if (irs->o == IR_SNEW) {  /* Fuse string allocation. */
+      args[1] = irs->op1;  /* const void * */
+      args[2] = irs->op2;  /* MSize */
+      ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
+    }
+  }
+  asm_setupresult(as, ir, ci);  /* SBuf * */
+  asm_gencall(as, ci, args);
+  if (args[1] == ASMREF_TMP1) {
+    Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
+    if (kchar == -1)
+      asm_tvptr(as, tmp, irs->op1);
+    else
+      ra_allockreg(as, kchar, tmp);
+  }
+}
+
+static void asm_bufstr(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
+  IRRef args[1];
+  args[0] = ir->op1;  /* SBuf *sb */
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCstr * */
+  asm_gencall(as, ci, args);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci;
+  IRRef args[2];
+  args[0] = ASMREF_L;
+  as->gcsteps++;
+  if (ir->op2 == IRTOSTR_NUM) {
+    args[1] = ASMREF_TMP1;  /* cTValue * */
+    ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
+  } else {
+    args[1] = ir->op1;  /* int32_t k */
+    if (ir->op2 == IRTOSTR_INT)
+      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
+    else
+      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
+  }
+  asm_setupresult(as, ir, ci);  /* GCstr * */
+  asm_gencall(as, ci, args);
+  if (ir->op2 == IRTOSTR_NUM)
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+}
+
+#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+  IRCallID id;
+  IRRef args[2];
+  lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
+  args[LJ_BE] = (ir-1)->op1;
+  args[LJ_LE] = ir->op1;
+  if (st == IRT_NUM || st == IRT_FLOAT) {
+    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+    ir--;
+  } else {
+    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
+  }
+  {
+#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
+    CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
+    cim.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
+#else
+    const CCallInfo *ci = &lj_ir_callinfo[id];
+#endif
+    asm_setupresult(as, ir, ci);
+    asm_gencall(as, ci, args);
+  }
+}
+#endif
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+  IRRef args[3];
+  if (ir->r == RID_SINK)
+    return;
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ir->op1;      /* GCtab *t     */
+  args[2] = ASMREF_TMP1;  /* cTValue *key */
+  asm_setupresult(as, ir, ci);  /* TValue * */
+  asm_gencall(as, ci, args);
+  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+}
+
+static void asm_lref(ASMState *as, IRIns *ir)
+{
+  Reg r = ra_dest(as, ir, RSET_GPR);
+#if LJ_TARGET_X86ORX64
+  ra_left(as, r, ASMREF_L);
+#else
+  ra_leftov(as, r, ASMREF_L);
+#endif
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Collect arguments from CALL* and CARG instructions. */
+static void asm_collectargs(ASMState *as, IRIns *ir,
+			    const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n = CCI_XNARGS(ci);
+  lua_assert(n <= CCI_NARGS_MAX*2);  /* Account for split args. */
+  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+  while (n-- > 1) {
+    ir = IR(ir->op1);
+    lua_assert(ir->o == IR_CARG);
+    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
+  }
+  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
+  lua_assert(IR(ir->op1)->o != IR_CARG);
+}
+
+/* Reconstruct CCallInfo flags for CALLX*. */
+static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
+{
+  uint32_t nargs = 0;
+  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
+    IRIns *ira = IR(ir->op1);
+    nargs++;
+    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
+  }
+#if LJ_HASFFI
+  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
+    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
+    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
+    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
+#if LJ_TARGET_X86
+    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
+#endif
+  }
+#endif
+  return (nargs | (ir->t.irt << CCI_OTSHIFT));
+}
+
+static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[id];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX];
+  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+  asm_collectargs(as, ir, ci, args);
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+#if !LJ_SOFTFP
+static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
+  IRRef args[2];
+  args[0] = lref;
+  args[1] = rref;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
+{
+  IRIns *irp = IR(ir->op1);
+  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+    IRIns *irpp = IR(irp->op1);
+    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+      asm_fppow(as, ir, irpp->op1, irp->op2);
+      return 1;
+    }
+  }
+  return 0;
+}
+#endif
+
 /* -- PHI and loop handling ----------------------------------------------- */
 /* -- PHI and loop handling ----------------------------------------------- */
 
 
 /* Break a PHI cycle by renaming to a free register (evict if needed). */
 /* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1181,6 +1424,7 @@ static void asm_phi_copyspill(ASMState *as)
 	if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) {
 	if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) {
 	  emit_spstore(as, irl, r, sps_scale(irl->s));
 	  emit_spstore(as, irl, r, sps_scale(irl->s));
 	  emit_spload(as, ir, r, sps_scale(ir->s));
 	  emit_spload(as, ir, r, sps_scale(ir->s));
+	  checkmclim(as);
 	}
 	}
       }
       }
     }
     }
@@ -1206,6 +1450,7 @@ static void asm_phi_copyspill(ASMState *as)
 	if (ra_hasspill(irl->s) && irt_isfp(ir->t)) {
 	if (ra_hasspill(irl->s) && irt_isfp(ir->t)) {
 	  emit_spstore(as, irl, r, sps_scale(irl->s));
 	  emit_spstore(as, irl, r, sps_scale(irl->s));
 	  emit_spload(as, ir, r, sps_scale(ir->s));
 	  emit_spload(as, ir, r, sps_scale(ir->s));
+	  checkmclim(as);
 	}
 	}
       }
       }
     }
     }
@@ -1223,16 +1468,18 @@ static void asm_phi_fixup(ASMState *as)
     Reg r = rset_picktop(work);
     Reg r = rset_picktop(work);
     IRRef lref = as->phireg[r];
     IRRef lref = as->phireg[r];
     IRIns *ir = IR(lref);
     IRIns *ir = IR(lref);
-    /* Left PHI gained a spill slot before the loop? */
-    if (irt_ismarked(ir->t) && ra_hasspill(ir->s)) {
-      IRRef ren;
-      lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
-      ren = tref_ref(lj_ir_emit(as->J));
-      as->ir = as->T->ir;  /* The IR may have been reallocated. */
-      IR(ren)->r = (uint8_t)r;
-      IR(ren)->s = SPS_NONE;
+    if (irt_ismarked(ir->t)) {
+      irt_clearmark(ir->t);
+      /* Left PHI gained a spill slot before the loop? */
+      if (ra_hasspill(ir->s)) {
+	IRRef ren;
+	lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
+	ren = tref_ref(lj_ir_emit(as->J));
+	as->ir = as->T->ir;  /* The IR may have been reallocated. */
+	IR(ren)->r = (uint8_t)r;
+	IR(ren)->s = SPS_NONE;
+      }
     }
     }
-    irt_clearmark(ir->t);  /* Always clear marker. */
     rset_clear(work, r);
     rset_clear(work, r);
   }
   }
 }
 }
@@ -1313,6 +1560,129 @@ static void asm_loop(ASMState *as)
 #error "Missing assembler for target CPU"
 #error "Missing assembler for target CPU"
 #endif
 #endif
 
 
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+  switch ((IROp)ir->o) {
+  /* Miscellaneous ops. */
+  case IR_LOOP: asm_loop(as); break;
+  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
+  case IR_USE:
+    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+  case IR_PHI: asm_phi(as, ir); break;
+  case IR_HIOP: asm_hiop(as, ir); break;
+  case IR_GCSTEP: asm_gcstep(as, ir); break;
+  case IR_PROF: asm_prof(as, ir); break;
+
+  /* Guarded assertions. */
+  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
+  case IR_ABC:
+    asm_comp(as, ir);
+    break;
+  case IR_EQ: case IR_NE:
+    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+      as->curins--;
+      asm_href(as, ir-1, (IROp)ir->o);
+    } else {
+      asm_equal(as, ir);
+    }
+    break;
+
+  case IR_RETF: asm_retf(as, ir); break;
+
+  /* Bit ops. */
+  case IR_BNOT: asm_bnot(as, ir); break;
+  case IR_BSWAP: asm_bswap(as, ir); break;
+  case IR_BAND: asm_band(as, ir); break;
+  case IR_BOR: asm_bor(as, ir); break;
+  case IR_BXOR: asm_bxor(as, ir); break;
+  case IR_BSHL: asm_bshl(as, ir); break;
+  case IR_BSHR: asm_bshr(as, ir); break;
+  case IR_BSAR: asm_bsar(as, ir); break;
+  case IR_BROL: asm_brol(as, ir); break;
+  case IR_BROR: asm_bror(as, ir); break;
+
+  /* Arithmetic ops. */
+  case IR_ADD: asm_add(as, ir); break;
+  case IR_SUB: asm_sub(as, ir); break;
+  case IR_MUL: asm_mul(as, ir); break;
+  case IR_DIV: asm_div(as, ir); break;
+  case IR_MOD: asm_mod(as, ir); break;
+  case IR_POW: asm_pow(as, ir); break;
+  case IR_NEG: asm_neg(as, ir); break;
+  case IR_ABS: asm_abs(as, ir); break;
+  case IR_ATAN2: asm_atan2(as, ir); break;
+  case IR_LDEXP: asm_ldexp(as, ir); break;
+  case IR_MIN: asm_min(as, ir); break;
+  case IR_MAX: asm_max(as, ir); break;
+  case IR_FPMATH: asm_fpmath(as, ir); break;
+
+  /* Overflow-checking arithmetic ops. */
+  case IR_ADDOV: asm_addov(as, ir); break;
+  case IR_SUBOV: asm_subov(as, ir); break;
+  case IR_MULOV: asm_mulov(as, ir); break;
+
+  /* Memory references. */
+  case IR_AREF: asm_aref(as, ir); break;
+  case IR_HREF: asm_href(as, ir, 0); break;
+  case IR_HREFK: asm_hrefk(as, ir); break;
+  case IR_NEWREF: asm_newref(as, ir); break;
+  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+  case IR_FREF: asm_fref(as, ir); break;
+  case IR_STRREF: asm_strref(as, ir); break;
+  case IR_LREF: asm_lref(as, ir); break;
+
+  /* Loads and stores. */
+  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+    asm_ahuvload(as, ir);
+    break;
+  case IR_FLOAD: asm_fload(as, ir); break;
+  case IR_XLOAD: asm_xload(as, ir); break;
+  case IR_SLOAD: asm_sload(as, ir); break;
+
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+  case IR_FSTORE: asm_fstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir); break;
+
+  /* Allocations. */
+  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+  case IR_TNEW: asm_tnew(as, ir); break;
+  case IR_TDUP: asm_tdup(as, ir); break;
+  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
+
+  /* Buffer operations. */
+  case IR_BUFHDR: asm_bufhdr(as, ir); break;
+  case IR_BUFPUT: asm_bufput(as, ir); break;
+  case IR_BUFSTR: asm_bufstr(as, ir); break;
+
+  /* Write barriers. */
+  case IR_TBAR: asm_tbar(as, ir); break;
+  case IR_OBAR: asm_obar(as, ir); break;
+
+  /* Type conversions. */
+  case IR_TOBIT: asm_tobit(as, ir); break;
+  case IR_CONV: asm_conv(as, ir); break;
+  case IR_TOSTR: asm_tostr(as, ir); break;
+  case IR_STRTO: asm_strto(as, ir); break;
+
+  /* Calls. */
+  case IR_CALLA:
+    as->gcsteps++;
+    /* fallthrough */
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+  case IR_CALLXS: asm_callx(as, ir); break;
+  case IR_CARG: break;
+
+  default:
+    setintV(&as->J->errinfo, ir->o);
+    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+    break;
+  }
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 /* -- Head of trace ------------------------------------------------------- */
 
 
 /* Head of a root trace. */
 /* Head of a root trace. */
@@ -1347,6 +1717,11 @@ static void asm_head_side(ASMState *as)
   int pass3 = 0;
   int pass3 = 0;
   IRRef i;
   IRRef i;
 
 
+  if (as->snapno && as->topslot > as->parent->topslot) {
+    /* Force snap #0 alloc to prevent register overwrite in stack check. */
+    as->snapno = 0;
+    asm_snap_alloc(as);
+  }
   allow = asm_head_side_base(as, irp, allow);
   allow = asm_head_side_base(as, irp, allow);
 
 
   /* Scan all parent SLOADs and collect register dependencies. */
   /* Scan all parent SLOADs and collect register dependencies. */
@@ -1538,7 +1913,7 @@ static void asm_tail_link(ASMState *as)
     mres = (int32_t)(snap->nslots - baseslot);
     mres = (int32_t)(snap->nslots - baseslot);
     switch (bc_op(*pc)) {
     switch (bc_op(*pc)) {
     case BC_CALLM: case BC_CALLMT:
     case BC_CALLM: case BC_CALLMT:
-      mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break;
+      mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
     case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
     case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
     case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
     case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
     default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
     default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1646,7 +2021,7 @@ static void asm_setup_regsp(ASMState *as)
 	as->modset |= RSET_SCRATCH;
 	as->modset |= RSET_SCRATCH;
       continue;
       continue;
       }
       }
-    case IR_CALLN: case IR_CALLL: case IR_CALLS: {
+    case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
       const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
       const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
       ir->prev = asm_setup_call_slots(as, ir, ci);
       ir->prev = asm_setup_call_slots(as, ir, ci);
       if (inloop)
       if (inloop)
@@ -1691,10 +2066,20 @@ static void asm_setup_regsp(ASMState *as)
       /* fallthrough */
       /* fallthrough */
 #endif
 #endif
     /* C calls evict all scratch regs and return results in RID_RET. */
     /* C calls evict all scratch regs and return results in RID_RET. */
-    case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
+    case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
       if (REGARG_NUMGPR < 3 && as->evenspill < 3)
       if (REGARG_NUMGPR < 3 && as->evenspill < 3)
 	as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
 	as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
-    case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR:
+#if LJ_TARGET_X86 && LJ_HASFFI
+      if (0) {
+    case IR_CNEW:
+	if (ir->op2 != REF_NIL && as->evenspill < 4)
+	  as->evenspill = 4;  /* lj_cdata_newv needs 4 args. */
+      }
+#else
+    case IR_CNEW:
+#endif
+    case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
+    case IR_BUFSTR:
       ir->prev = REGSP_HINT(RID_RET);
       ir->prev = REGSP_HINT(RID_RET);
       if (inloop)
       if (inloop)
 	as->modset = RSET_SCRATCH;
 	as->modset = RSET_SCRATCH;
@@ -1703,21 +2088,26 @@ static void asm_setup_regsp(ASMState *as)
       if (inloop)
       if (inloop)
 	as->modset = RSET_SCRATCH;
 	as->modset = RSET_SCRATCH;
       break;
       break;
-#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP
-    case IR_ATAN2: case IR_LDEXP:
+#if !LJ_SOFTFP
+    case IR_ATAN2:
+#if LJ_TARGET_X86
+      if (as->evenspill < 4)  /* Leave room to call atan2(). */
+	as->evenspill = 4;
+#endif
+#if !LJ_TARGET_X86ORX64
+    case IR_LDEXP:
+#endif
 #endif
 #endif
     case IR_POW:
     case IR_POW:
       if (!LJ_SOFTFP && irt_isnum(ir->t)) {
       if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-#if LJ_TARGET_X86ORX64
-	ir->prev = REGSP_HINT(RID_XMM0);
 	if (inloop)
 	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
+	  as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
+	break;
 #else
 #else
 	ir->prev = REGSP_HINT(RID_FPRET);
 	ir->prev = REGSP_HINT(RID_FPRET);
-	if (inloop)
-	  as->modset |= RSET_SCRATCH;
-#endif
 	continue;
 	continue;
+#endif
       }
       }
       /* fallthrough for integer POW */
       /* fallthrough for integer POW */
     case IR_DIV: case IR_MOD:
     case IR_DIV: case IR_MOD:
@@ -1730,26 +2120,25 @@ static void asm_setup_regsp(ASMState *as)
       break;
       break;
     case IR_FPMATH:
     case IR_FPMATH:
 #if LJ_TARGET_X86ORX64
 #if LJ_TARGET_X86ORX64
-      if (ir->op2 == IRFPM_EXP2) {  /* May be joined to lj_vm_pow_sse. */
-	ir->prev = REGSP_HINT(RID_XMM0);
-#if !LJ_64
-	if (as->evenspill < 4)  /* Leave room for 16 byte scratch area. */
+      if (ir->op2 <= IRFPM_TRUNC) {
+	if (!(as->flags & JIT_F_SSE4_1)) {
+	  ir->prev = REGSP_HINT(RID_XMM0);
+	  if (inloop)
+	    as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+	  continue;
+	}
+	break;
+      } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
+	if (as->evenspill < 4)  /* Leave room to call pow(). */
 	  as->evenspill = 4;
 	  as->evenspill = 4;
-#endif
-	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-	continue;
-      } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
-	ir->prev = REGSP_HINT(RID_XMM0);
-	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
-	continue;
       }
       }
+#endif
+      if (inloop)
+	as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
       break;
       break;
 #else
 #else
       ir->prev = REGSP_HINT(RID_FPRET);
       ir->prev = REGSP_HINT(RID_FPRET);
-      if (inloop)
-	as->modset |= RSET_SCRATCH;
       continue;
       continue;
 #endif
 #endif
 #if LJ_TARGET_X86ORX64
 #if LJ_TARGET_X86ORX64
@@ -1822,6 +2211,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
 
 
   do {
   do {
     as->mcp = as->mctop;
     as->mcp = as->mctop;
+#ifdef LUA_USE_ASSERT
+    as->mcp_prev = as->mcp;
+#endif
     as->curins = T->nins;
     as->curins = T->nins;
     RA_DBG_START();
     RA_DBG_START();
     RA_DBGX((as, "===== STOP ====="));
     RA_DBGX((as, "===== STOP ====="));

+ 1 - 1
jni/LuaJIT-2.0.1/src/lj_asm.h → jni/LuaJIT-2.1/src/lj_asm.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** IR assembler (SSA IR -> machine code).
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_ASM_H
 #ifndef _LJ_ASM_H

+ 173 - 308
jni/LuaJIT-2.0.1/src/lj_asm_arm.h → jni/LuaJIT-2.1/src/lj_asm_arm.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** ARM IR assembler (SSA IR -> machine code).
 ** ARM IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Register allocator extensions --------------------------------------- */
 /* -- Register allocator extensions --------------------------------------- */
@@ -91,6 +91,7 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
   *mxp++ = group*EXITSTUBS_PER_GROUP;
   *mxp++ = group*EXITSTUBS_PER_GROUP;
   for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
   for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
     *mxp++ = ARMI_B|((-6-i)&0x00ffffffu);
     *mxp++ = ARMI_B|((-6-i)&0x00ffffffu);
+  lj_mcode_sync(as->mcbot, mxp);
   lj_mcode_commitbot(as->J, mxp);
   lj_mcode_commitbot(as->J, mxp);
   as->mcbot = mxp;
   as->mcbot = mxp;
   as->mclim = as->mcbot + MCLIM_REDZONE;
   as->mclim = as->mcbot + MCLIM_REDZONE;
@@ -337,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
 /* Generate a call to a C function. */
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 0;
   int32_t ofs = 0;
 #if LJ_SOFTFP
 #if LJ_SOFTFP
   Reg gpr = REGARG_FIRSTGPR;
   Reg gpr = REGARG_FIRSTGPR;
@@ -452,18 +453,9 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   UNUSED(ci);
   UNUSED(ci);
 }
 }
 
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 static void asm_callx(ASMState *as, IRIns *ir)
 {
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   CCallInfo ci;
   CCallInfo ci;
   IRRef func;
   IRRef func;
   IRIns *irf;
   IRIns *irf;
@@ -489,9 +481,10 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   /* Need to force a spill on REF_BASE now to update the stack slot. */
   /* Need to force a spill on REF_BASE now to update the stack slot. */
   emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE)));
   emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE)));
   emit_setgl(as, base, jit_base);
   emit_setgl(as, base, jit_base);
@@ -520,13 +513,15 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
 static void asm_tobit(ASMState *as, IRIns *ir)
 static void asm_tobit(ASMState *as, IRIns *ir)
 {
 {
   RegSet allow = RSET_FPR;
   RegSet allow = RSET_FPR;
-  Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, allow);
   Reg left = ra_alloc1(as, ir->op1, allow);
   Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
   Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
   Reg tmp = ra_scratch(as, rset_clear(allow, right));
   Reg tmp = ra_scratch(as, rset_clear(allow, right));
+  Reg dest = ra_dest(as, ir, RSET_GPR);
   emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
   emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
   emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
   emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
 }
 }
+#else
+#define asm_tobit(as, ir)	lua_assert(0)
 #endif
 #endif
 
 
 static void asm_conv(ASMState *as, IRIns *ir)
 static void asm_conv(ASMState *as, IRIns *ir)
@@ -563,9 +558,9 @@ static void asm_conv(ASMState *as, IRIns *ir)
       lua_assert(irt_isint(ir->t) && st == IRT_NUM);
       lua_assert(irt_isint(ir->t) && st == IRT_NUM);
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
     } else {
     } else {
-      Reg dest = ra_dest(as, ir, RSET_GPR);
       Reg left = ra_alloc1(as, lref, RSET_FPR);
       Reg left = ra_alloc1(as, lref, RSET_FPR);
       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+      Reg dest = ra_dest(as, ir, RSET_GPR);
       ARMIns ai;
       ARMIns ai;
       emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
       emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
       ai = irt_isint(ir->t) ?
       ai = irt_isint(ir->t) ?
@@ -599,31 +594,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-#if !LJ_SOFTFP && LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  CCallInfo ci;
-  IRRef args[2];
-  args[0] = (ir-1)->op1;
-  args[1] = ir->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = lj_ir_callinfo[id];
-#if !LJ_ABI_SOFTFP
-  ci.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
-#endif
-  asm_setupresult(as, ir, &ci);
-  asm_gencall(as, &ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
 {
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -687,6 +657,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
     emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
 }
 }
 
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
 {
@@ -712,7 +684,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
       Reg src = ra_alloc1(as, ref, allow);
       Reg src = ra_alloc1(as, ref, allow);
       emit_lso(as, ARMI_STR, src, RID_SP, 0);
       emit_lso(as, ARMI_STR, src, RID_SP, 0);
     }
     }
-    if ((ir+1)->o == IR_HIOP)
+    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
       type = ra_alloc1(as, ref+1, allow);
       type = ra_alloc1(as, ref+1, allow);
     else
     else
       type = ra_allock(as, irt_toitype(ir->t), allow);
       type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -720,27 +692,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
   }
 }
 }
 
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 static void asm_aref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -958,20 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
     emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
 }
 }
 
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1104,7 +1041,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 }
 
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
 {
   if (ir->r != RID_SINK) {
   if (ir->r != RID_SINK) {
     Reg src = ra_alloc1(as, ir->op2,
     Reg src = ra_alloc1(as, ir->op2,
@@ -1114,6 +1051,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
   }
 }
 }
 
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
 {
   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1209,6 +1148,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
   } else
   } else
 #endif
 #endif
   if (ra_used(ir)) {
   if (ra_used(ir)) {
+    Reg tmp = RID_NONE;
+    if ((ir->op2 & IRSLOAD_CONVERT))
+      tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
     lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
     lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
 	       irt_isint(ir->t) || irt_isaddr(ir->t));
 	       irt_isint(ir->t) || irt_isaddr(ir->t));
     dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
     dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
@@ -1216,18 +1158,15 @@ static void asm_sload(ASMState *as, IRIns *ir)
     base = ra_alloc1(as, REF_BASE, allow);
     base = ra_alloc1(as, REF_BASE, allow);
     if ((ir->op2 & IRSLOAD_CONVERT)) {
     if ((ir->op2 & IRSLOAD_CONVERT)) {
       if (t == IRT_INT) {
       if (t == IRT_INT) {
-	Reg tmp = ra_scratch(as, RSET_FPR);
 	emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
 	emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
 	emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15));
 	emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15));
-	dest = tmp;
 	t = IRT_NUM;  /* Check for original type. */
 	t = IRT_NUM;  /* Check for original type. */
       } else {
       } else {
-	Reg tmp = ra_scratch(as, RSET_GPR);
 	emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15));
 	emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15));
 	emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15));
 	emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15));
-	dest = tmp;
 	t = IRT_INT;  /* Check for original type. */
 	t = IRT_INT;  /* Check for original type. */
       }
       }
+      dest = tmp;
     }
     }
     goto dotypecheck;
     goto dotypecheck;
   }
   }
@@ -1271,19 +1210,16 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
+  IRRef args[4];
   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
   RegSet drop = RSET_SCRATCH;
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
   ra_evictset(as, drop);
@@ -1305,16 +1241,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; ir--;
       ofs -= 4; ir--;
     }
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   {
   {
-    uint32_t k = emit_isk12(ARMI_MOV, ctypeid);
-    Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow);
+    uint32_t k = emit_isk12(ARMI_MOV, id);
+    Reg r = k ? RID_R1 : ra_allock(as, id, allow);
     emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
     emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
     emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
     emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
     emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
     emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
     if (k) emit_d(as, ARMI_MOV^k, RID_R1);
     if (k) emit_d(as, ARMI_MOV^k, RID_R1);
   }
   }
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1391,24 +1339,41 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
   emit_dm(as, ai, (dest & 15), (left & 15));
   emit_dm(as, ai, (dest & 15), (left & 15));
 }
 }
 
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_callround(ASMState *as, IRIns *ir, int id)
+{
+  /* The modified regs must match with the *.dasc implementation. */
+  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
+		RID2RSET(RID_R3)|RID2RSET(RID_R12);
+  RegSet of;
+  Reg dest, src;
+  ra_evictset(as, drop);
+  dest = ra_dest(as, ir, RSET_FPR);
+  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
+  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
+		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
+				   (void *)lj_vm_trunc_sf);
+  /* Workaround to protect argument GPRs from being used for remat. */
+  of = as->freeset;
+  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
+  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
+  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
+  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
+  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 <= IRFPM_TRUNC)
+    asm_callround(as, ir, ir->op2);
+  else if (ir->op2 == IRFPM_SQRT)
+    asm_fpunary(as, ir, ARMI_VSQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 }
+#else
+#define asm_fpmath(as, ir)	lua_assert(0)
 #endif
 #endif
 
 
 static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
 static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1458,32 +1423,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
   asm_intop(as, ir, ai);
   asm_intop(as, ir, ai);
 }
 }
 
 
-static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
-{
-  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
-    uint32_t cc = (as->mcp[1] >> 28);
-    as->flagmcp = NULL;
-    if (cc <= CC_NE) {
-      as->mcp++;
-      ai |= ARMI_S;
-    } else if (cc == CC_GE) {
-      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-      ai |= ARMI_S;
-    } else if (cc == CC_LT) {
-      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-      ai |= ARMI_S;
-    }  /* else: other conds don't work with bit ops. */
-  }
-  if (ir->op2 == 0) {
-    Reg dest = ra_dest(as, ir, RSET_GPR);
-    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
-    emit_d(as, ai^m, dest);
-  } else {
-    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
-    asm_intop(as, ir, ai);
-  }
-}
-
 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1502,7 +1441,7 @@ static void asm_intmul(ASMState *as, IRIns *ir)
   if (dest == left && left != right) { left = right; right = dest; }
   if (dest == left && left != right) { left = right; right = dest; }
   if (irt_isguard(ir->t)) {  /* IR_MULOV */
   if (irt_isguard(ir->t)) {  /* IR_MULOV */
     if (!(as->flags & JIT_F_ARMV6) && dest == left)
     if (!(as->flags & JIT_F_ARMV6) && dest == left)
-      tmp = left = ra_scratch(as, rset_exclude(RSET_FPR, left));
+      tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left));
     asm_guardcc(as, CC_NE);
     asm_guardcc(as, CC_NE);
     emit_nm(as, ARMI_TEQ|ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest);
     emit_nm(as, ARMI_TEQ|ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest);
     emit_dnm(as, ARMI_SMULL|ARMF_S(right), dest, RID_TMP, left);
     emit_dnm(as, ARMI_SMULL|ARMF_S(right), dest, RID_TMP, left);
@@ -1549,6 +1488,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
   asm_intmul(as, ir);
   asm_intmul(as, ir);
 }
 }
 
 
+#define asm_addov(as, ir)	asm_add(as, ir)
+#define asm_subov(as, ir)	asm_sub(as, ir)
+#define asm_mulov(as, ir)	asm_mul(as, ir)
+
+#if LJ_SOFTFP
+#define asm_div(as, ir)		lua_assert(0)
+#define asm_pow(as, ir)		lua_assert(0)
+#define asm_abs(as, ir)		lua_assert(0)
+#define asm_atan2(as, ir)	lua_assert(0)
+#define asm_ldexp(as, ir)	lua_assert(0)
+#else
+#define asm_div(as, ir)		asm_fparith(as, ir, ARMI_VDIV_D)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+#define asm_abs(as, ir)		asm_fpunary(as, ir, ARMI_VABS_D)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+#endif
+
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {
 #if !LJ_SOFTFP
 #if !LJ_SOFTFP
@@ -1560,41 +1519,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
   asm_intneg(as, ir, ARMI_RSB);
   asm_intneg(as, ir, ARMI_RSB);
 }
 }
 
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
 {
 {
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
+  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
+    uint32_t cc = (as->mcp[1] >> 28);
+    as->flagmcp = NULL;
+    if (cc <= CC_NE) {
+      as->mcp++;
+      ai |= ARMI_S;
+    } else if (cc == CC_GE) {
+      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
+      ai |= ARMI_S;
+    } else if (cc == CC_LT) {
+      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
+      ai |= ARMI_S;
+    }  /* else: other conds don't work with bit ops. */
+  }
+  if (ir->op2 == 0) {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+    emit_d(as, ai^m, dest);
+  } else {
+    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
+    asm_intop(as, ir, ai);
+  }
 }
 }
 
 
-#if !LJ_SOFTFP
-static void asm_callround(ASMState *as, IRIns *ir, int id)
-{
-  /* The modified regs must match with the *.dasc implementation. */
-  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
-		RID2RSET(RID_R3)|RID2RSET(RID_R12);
-  RegSet of;
-  Reg dest, src;
-  ra_evictset(as, drop);
-  dest = ra_dest(as, ir, RSET_FPR);
-  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
-  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
-		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
-				   (void *)lj_vm_trunc_sf);
-  /* Workaround to protect argument GPRs from being used for remat. */
-  of = as->freeset;
-  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
-  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
-  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
-  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
-  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
-}
-#endif
+#define asm_bnot(as, ir)	asm_bitop(as, ir, ARMI_MVN)
 
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1611,6 +1564,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_band(as, ir)	asm_bitop(as, ir, ARMI_AND)
+#define asm_bor(as, ir)		asm_bitop(as, ir, ARMI_ORR)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, ARMI_EOR)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
 static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
 {
 {
   if (irref_isk(ir->op2)) {  /* Constant shifts. */
   if (irref_isk(ir->op2)) {  /* Constant shifts. */
@@ -1628,6 +1585,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
   }
   }
 }
 }
 
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, ARMSH_LSL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, ARMSH_LSR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, ARMSH_ASR)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, ARMSH_ROR)
+#define asm_brol(as, ir)	lua_assert(0)
+
 static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 {
 {
   uint32_t kcmp = 0, kmov = 0;
   uint32_t kcmp = 0, kmov = 0;
@@ -1701,6 +1664,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
     asm_intmin_max(as, ir, cc);
     asm_intmin_max(as, ir, cc);
 }
 }
 
 
+#define asm_min(as, ir)		asm_min_max(as, ir, CC_GT, CC_HI)
+#define asm_max(as, ir)		asm_min_max(as, ir, CC_LT, CC_LO)
+
 /* -- Comparisons --------------------------------------------------------- */
 /* -- Comparisons --------------------------------------------------------- */
 
 
 /* Map of comparisons to flags. ORDER IR. */
 /* Map of comparisons to flags. ORDER IR. */
@@ -1816,6 +1782,19 @@ notst:
     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
 }
 }
 
 
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+  if (irt_isnum(ir->t))
+    asm_fpcomp(as, ir);
+  else
+#endif
+    asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
+#if LJ_HASFFI
 /* 64 bit integer comparisons. */
 /* 64 bit integer comparisons. */
 static void asm_int64comp(ASMState *as, IRIns *ir)
 static void asm_int64comp(ASMState *as, IRIns *ir)
 {
 {
@@ -1851,6 +1830,7 @@ static void asm_int64comp(ASMState *as, IRIns *ir)
   }
   }
   emit_n(as, ARMI_CMP^mhi, lefthi);
   emit_n(as, ARMI_CMP^mhi, lefthi);
 }
 }
+#endif
 
 
 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
 
 
@@ -1864,11 +1844,14 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   if ((ir-1)->o <= IR_NE) {  /* 64 bit integer or FP comparisons. ORDER IR. */
   if ((ir-1)->o <= IR_NE) {  /* 64 bit integer or FP comparisons. ORDER IR. */
     as->curins--;  /* Always skip the loword comparison. */
     as->curins--;  /* Always skip the loword comparison. */
 #if LJ_SOFTFP
 #if LJ_SOFTFP
-    if (!irt_isint(ir->t))
+    if (!irt_isint(ir->t)) {
       asm_sfpcomp(as, ir-1);
       asm_sfpcomp(as, ir-1);
-    else
+      return;
+    }
+#endif
+#if LJ_HASFFI
+    asm_int64comp(as, ir-1);
 #endif
 #endif
-      asm_int64comp(as, ir-1);
     return;
     return;
 #if LJ_SOFTFP
 #if LJ_SOFTFP
   } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
   } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
@@ -1885,7 +1868,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
   } else if ((ir-1)->o == IR_XSTORE) {
   } else if ((ir-1)->o == IR_XSTORE) {
     if ((ir-1)->r != RID_SINK)
     if ((ir-1)->r != RID_SINK)
-      asm_xstore(as, ir, 4);
+      asm_xstore_(as, ir, 4);
     return;
     return;
   }
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
@@ -1933,6 +1916,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
+  emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 /* -- Stack handling ------------------------------------------------------ */
 
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1962,7 +1955,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
 	   (int32_t)offsetof(lua_State, maxstack));
 	   (int32_t)offsetof(lua_State, maxstack));
   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
-    int32_t i = i32ptr(&J2G(as->J)->jit_L);
+    int32_t i = i32ptr(&J2G(as->J)->cur_L);
     if (ra_hasspill(irp->s))
     if (ra_hasspill(irp->s))
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1970,7 +1963,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
     emit_loadi(as, RID_TMP, (i & ~4095));
     emit_loadi(as, RID_TMP, (i & ~4095));
   } else {
   } else {
-    emit_getgl(as, RID_TMP, jit_L);
+    emit_getgl(as, RID_TMP, cur_L);
   }
   }
 }
 }
 
 
@@ -2079,13 +2072,13 @@ static void asm_loop_fixup(ASMState *as)
 
 
 /* -- Head of trace ------------------------------------------------------- */
 /* -- Head of trace ------------------------------------------------------- */
 
 
-/* Reload L register from g->jit_L. */
+/* Reload L register from g->cur_L. */
 static void asm_head_lreg(ASMState *as)
 static void asm_head_lreg(ASMState *as)
 {
 {
   IRIns *ir = IR(ASMREF_L);
   IRIns *ir = IR(ASMREF_L);
   if (ra_used(ir)) {
   if (ra_used(ir)) {
     Reg r = ra_dest(as, ir, RSET_GPR);
     Reg r = ra_dest(as, ir, RSET_GPR);
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
     ra_evictk(as);
     ra_evictk(as);
   }
   }
 }
 }
@@ -2096,7 +2089,8 @@ static void asm_head_root_base(ASMState *as)
   IRIns *ir;
   IRIns *ir;
   asm_head_lreg(as);
   asm_head_lreg(as);
   ir = IR(REF_BASE);
   ir = IR(REF_BASE);
-  if (ra_hasreg(ir->r) && rset_test(as->modset, ir->r)) ra_spill(as, ir);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
   ra_destreg(as, ir, RID_BASE);
   ra_destreg(as, ir, RID_BASE);
 }
 }
 
 
@@ -2106,7 +2100,8 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   IRIns *ir;
   IRIns *ir;
   asm_head_lreg(as);
   asm_head_lreg(as);
   ir = IR(REF_BASE);
   ir = IR(REF_BASE);
-  if (ra_hasreg(ir->r) && rset_test(as->modset, ir->r)) ra_spill(as, ir);
+  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+    ra_spill(as, ir);
   if (ra_hasspill(irp->s)) {
   if (ra_hasspill(irp->s)) {
     rset_clear(allow, ra_dest(as, ir, allow));
     rset_clear(allow, ra_dest(as, ir, allow));
   } else {
   } else {
@@ -2154,143 +2149,13 @@ static void asm_tail_prep(ASMState *as)
   *p = 0;  /* Prevent load/store merging. */
   *p = 0;  /* Prevent load/store merging. */
 }
 }
 
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-      break;
-    }
-    /* fallthrough */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-#if !LJ_SOFTFP
-    if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
-#endif
-    asm_intcomp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
-  case IR_BOR:  asm_bitop(as, ir, ARMI_ORR); break;
-  case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
-  case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
-  case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
-  case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
-  case IR_BROL: lua_assert(0); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
-  case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
-  case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-#if LJ_SOFTFP
-  case IR_DIV: case IR_POW: case IR_ABS:
-  case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
-    lua_assert(0);  /* Unused for LJ_SOFTFP. */
-    break;
-#else
-  case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 <= IRFPM_TRUNC)
-      asm_callround(as, ir, ir->op2);
-    else if (ir->op2 == IRFPM_SQRT)
-      asm_fpunary(as, ir, ARMI_VSQRT_D);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-#endif
-
-  case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
-  case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir, 0); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 /* -- Trace setup --------------------------------------------------------- */
 
 
 /* Ensure there are enough stack slots for call arguments. */
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
 {
-  IRRef args[CCI_NARGS_MAX];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
   asm_collectargs(as, ir, ci, args);
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++) {
   for (i = 0; i < nargs; i++) {

+ 118 - 259
jni/LuaJIT-2.0.1/src/lj_asm_mips.h → jni/LuaJIT-2.1/src/lj_asm_mips.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** MIPS IR assembler (SSA IR -> machine code).
 ** MIPS IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Register allocator extensions --------------------------------------- */
 /* -- Register allocator extensions --------------------------------------- */
@@ -71,6 +71,7 @@ static void asm_sparejump_setup(ASMState *as)
     memset(mxp+2, 0, MIPS_SPAREJUMP*8);
     memset(mxp+2, 0, MIPS_SPAREJUMP*8);
     mxp += MIPS_SPAREJUMP*2;
     mxp += MIPS_SPAREJUMP*2;
     lua_assert(mxp < as->mctop);
     lua_assert(mxp < as->mctop);
+    lj_mcode_sync(as->mcbot, mxp);
     lj_mcode_commitbot(as->J, mxp);
     lj_mcode_commitbot(as->J, mxp);
     as->mcbot = mxp;
     as->mcbot = mxp;
     as->mclim = as->mcbot + MCLIM_REDZONE;
     as->mclim = as->mcbot + MCLIM_REDZONE;
@@ -225,7 +226,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
 /* Generate a call to a C function. */
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 16;
   int32_t ofs = 16;
   Reg gpr, fpr = REGARG_FIRSTFPR;
   Reg gpr, fpr = REGARG_FIRSTFPR;
   if ((void *)ci->func)
   if ((void *)ci->func)
@@ -282,6 +283,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
       else
       else
 	ofs += 4;
 	ofs += 4;
     }
     }
+    checkmclim(as);
   }
   }
 }
 }
 
 
@@ -324,18 +326,9 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   }
   }
 }
 }
 
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 static void asm_callx(ASMState *as, IRIns *ir)
 {
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   CCallInfo ci;
   CCallInfo ci;
   IRRef func;
   IRRef func;
   IRIns *irf;
   IRIns *irf;
@@ -360,16 +353,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
   asm_gencall(as, &ci, args);
   asm_gencall(as, &ci, args);
 }
 }
 
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
 static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
 {
 {
   /* The modified regs must match with the *.dasc implementation. */
   /* The modified regs must match with the *.dasc implementation. */
@@ -389,9 +372,10 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   emit_setgl(as, base, jit_base);
   emit_setgl(as, base, jit_base);
   emit_addptr(as, base, -8*delta);
   emit_addptr(as, base, -8*delta);
   asm_guard(as, MIPSI_BNE, RID_TMP,
   asm_guard(as, MIPSI_BNE, RID_TMP,
@@ -517,28 +501,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  const CCallInfo *ci;
-  IRRef args[2];
-  args[LJ_BE?0:1] = ir->op1;
-  args[LJ_BE?1:0] = (ir-1)->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = &lj_ir_callinfo[id];
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
 {
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -555,6 +517,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
 	   RID_SP, sps_scale(ir->s));
 	   RID_SP, sps_scale(ir->s));
 }
 }
 
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
 {
@@ -578,27 +542,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
   }
 }
 }
 
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 static void asm_aref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -628,7 +571,7 @@ static void asm_aref(ASMState *as, IRIns *ir)
 **   } while ((n = nextnode(n)));
 **   } while ((n = nextnode(n)));
 **   return niltv(L);
 **   return niltv(L);
 */
 */
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 {
 {
   RegSet allow = RSET_GPR;
   RegSet allow = RSET_GPR;
   int destused = ra_used(ir);
   int destused = ra_used(ir);
@@ -654,37 +597,42 @@ static void asm_href(ASMState *as, IRIns *ir)
   tmp2 = ra_scratch(as, allow);
   tmp2 = ra_scratch(as, allow);
   rset_clear(allow, tmp2);
   rset_clear(allow, tmp2);
 
 
-  /* Key not found in chain: load niltv. */
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
   l_end = emit_label(as);
   l_end = emit_label(as);
-  if (destused)
+  as->invmcp = NULL;
+  if (merge == IR_NE)
+    asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
+  else if (destused)
     emit_loada(as, dest, niltvg(J2G(as->J)));
     emit_loada(as, dest, niltvg(J2G(as->J)));
-  else
-    *--as->mcp = MIPSI_NOP;
   /* Follow hash chain until the end. */
   /* Follow hash chain until the end. */
-  emit_move(as, dest, tmp1);
+  emit_move(as, dest, tmp2);
   l_loop = --as->mcp;
   l_loop = --as->mcp;
-  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next));
+  emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next));
   l_next = emit_label(as);
   l_next = emit_label(as);
 
 
   /* Type and value comparison. */
   /* Type and value comparison. */
+  if (merge == IR_EQ) {  /* Must match asm_guard(). */
+    emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
+    l_end = asm_exitstub_addr(as);
+  }
   if (irt_isnum(kt)) {
   if (irt_isnum(kt)) {
     emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
     emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
     emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
     emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
-	emit_tg(as, MIPSI_MFC1, tmp1, key+1);
-    emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
-    emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
+    *--as->mcp = MIPSI_NOP;  /* Avoid NaN comparison overhead. */
+    emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next);
+    emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM);
     emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
     emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
   } else {
   } else {
     if (irt_ispri(kt)) {
     if (irt_ispri(kt)) {
-      emit_branch(as, MIPSI_BEQ, tmp1, type, l_end);
+      emit_branch(as, MIPSI_BEQ, tmp2, type, l_end);
     } else {
     } else {
-      emit_branch(as, MIPSI_BEQ, tmp2, key, l_end);
-      emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
-      emit_branch(as, MIPSI_BNE, tmp1, type, l_next);
+      emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
+      emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr));
+      emit_branch(as, MIPSI_BNE, tmp2, type, l_next);
     }
     }
   }
   }
-  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
-  *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
+  emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it));
+  *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);
 
 
   /* Load main position relative to tab->node into dest. */
   /* Load main position relative to tab->node into dest. */
   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
@@ -774,20 +722,6 @@ nolo:
     emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
     emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
 }
 }
 
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  if (ir->r != RID_SINK) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-    IRRef args[3];
-    args[0] = ASMREF_L;     /* lua_State *L */
-    args[1] = ir->op1;      /* GCtab *t     */
-    args[2] = ASMREF_TMP1;  /* cTValue *key */
-    asm_setupresult(as, ir, ci);  /* TValue * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-  }
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -916,7 +850,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 }
 
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
 {
   if (ir->r != RID_SINK) {
   if (ir->r != RID_SINK) {
     Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
     Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
@@ -925,6 +859,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
   }
 }
 }
 
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
 {
   IRType1 t = ir->t;
   IRType1 t = ir->t;
@@ -1000,7 +936,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
       if (irt_isint(t)) {
       if (irt_isint(t)) {
 	Reg tmp = ra_scratch(as, RSET_FPR);
 	Reg tmp = ra_scratch(as, RSET_FPR);
 	emit_tg(as, MIPSI_MFC1, dest, tmp);
 	emit_tg(as, MIPSI_MFC1, dest, tmp);
-	emit_fg(as, MIPSI_CVT_W_D, tmp, tmp);
+	emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
 	dest = tmp;
 	dest = tmp;
 	t.irt = IRT_NUM;  /* Check for original type. */
 	t.irt = IRT_NUM;  /* Check for original type. */
       } else {
       } else {
@@ -1040,19 +976,15 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  IRRef args[4];
   RegSet drop = RSET_SCRATCH;
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
   ra_evictset(as, drop);
@@ -1061,6 +993,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
 
 
   /* Initialize immutable cdata object. */
   /* Initialize immutable cdata object. */
   if (ir->o == IR_CNEWI) {
   if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
     int32_t ofs = sizeof(GCcdata);
     int32_t ofs = sizeof(GCcdata);
     lua_assert(sz == 4 || sz == 8);
     lua_assert(sz == 4 || sz == 8);
     if (sz == 8) {
     if (sz == 8) {
@@ -1075,12 +1008,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; if (LJ_BE) ir++; else ir--;
       ofs -= 4; if (LJ_BE) ir++; else ir--;
     }
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
   emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
-  emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */
+  emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1150,23 +1095,16 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
   emit_fg(as, mi, dest, left);
   emit_fg(as, mi, dest, left);
 }
 }
 
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 <= IRFPM_TRUNC)
+    asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
+  else if (ir->op2 == IRFPM_SQRT)
+    asm_fpunary(as, ir, MIPSI_SQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 }
 
 
 static void asm_add(ASMState *as, IRIns *ir)
 static void asm_add(ASMState *as, IRIns *ir)
@@ -1212,6 +1150,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_div(as, ir)		asm_fparith(as, ir, MIPSI_DIV_D)
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {
   if (irt_isnum(ir->t)) {
   if (irt_isnum(ir->t)) {
@@ -1223,6 +1165,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_abs(as, ir)		asm_fpunary(as, ir, MIPSI_ABS_D)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
 static void asm_arithov(ASMState *as, IRIns *ir)
 static void asm_arithov(ASMState *as, IRIns *ir)
 {
 {
   Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
   Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
@@ -1256,13 +1202,21 @@ static void asm_arithov(ASMState *as, IRIns *ir)
     emit_move(as, RID_TMP, dest == left ? left : right);
     emit_move(as, RID_TMP, dest == left ? left : right);
 }
 }
 
 
+#define asm_addov(as, ir)	asm_arithov(as, ir)
+#define asm_subov(as, ir)	asm_arithov(as, ir)
+
 static void asm_mulov(ASMState *as, IRIns *ir)
 static void asm_mulov(ASMState *as, IRIns *ir)
 {
 {
-#if LJ_DUALNUM
-#error "NYI: MULOV"
-#else
-  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused in single-number mode. */
-#endif
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
+						 right), dest));
+  asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
+  emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
+  emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
+  emit_dst(as, MIPSI_MFLO, dest, 0, 0);
+  emit_dst(as, MIPSI_MULT, 0, left, right);
 }
 }
 
 
 #if LJ_HASFFI
 #if LJ_HASFFI
@@ -1349,7 +1303,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
 }
 }
 #endif
 #endif
 
 
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
 {
 {
   Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
   Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
   IRIns *irl = IR(ir->op1);
   IRIns *irl = IR(ir->op1);
@@ -1363,7 +1317,7 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
   emit_dst(as, MIPSI_NOR, dest, left, right);
   emit_dst(as, MIPSI_NOR, dest, left, right);
 }
 }
 
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1399,6 +1353,10 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   emit_dst(as, mi, dest, left, right);
   emit_dst(as, mi, dest, left, right);
 }
 }
 
 
+#define asm_band(as, ir)	asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
+#define asm_bor(as, ir)		asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
 static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1412,7 +1370,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   }
   }
 }
 }
 
 
-static void asm_bitror(ASMState *as, IRIns *ir)
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
+#define asm_brol(as, ir)	lua_assert(0)
+
+static void asm_bror(ASMState *as, IRIns *ir)
 {
 {
   if ((as->flags & JIT_F_MIPS32R2)) {
   if ((as->flags & JIT_F_MIPS32R2)) {
     asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
     asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
@@ -1461,6 +1424,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
   }
   }
 }
 }
 
 
+#define asm_min(as, ir)		asm_min_max(as, ir, 0)
+#define asm_max(as, ir)		asm_min_max(as, ir, 1)
+
 /* -- Comparisons --------------------------------------------------------- */
 /* -- Comparisons --------------------------------------------------------- */
 
 
 static void asm_comp(ASMState *as, IRIns *ir)
 static void asm_comp(ASMState *as, IRIns *ir)
@@ -1498,7 +1464,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-static void asm_compeq(ASMState *as, IRIns *ir)
+static void asm_equal(ASMState *as, IRIns *ir)
 {
 {
   Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
   Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
   right = (left >> 8); left &= 255;
   right = (left >> 8); left &= 255;
@@ -1572,8 +1538,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   } else if ((ir-1)->o == IR_XSTORE) {
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
     if ((ir-1)->r != RID_SINK) {
-      asm_xstore(as, ir, LJ_LE ? 4 : 0);
-      asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
+      asm_xstore_(as, ir, LJ_LE ? 4 : 0);
+      asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
     }
     }
     return;
     return;
   }
   }
@@ -1597,6 +1563,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
+  emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
+  emit_lsglptr(as, MIPSI_LBU, RID_TMP,
+	       (int32_t)offsetof(global_State, hookmask));
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 /* -- Stack handling ------------------------------------------------------ */
 
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1621,7 +1598,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
 }
 }
@@ -1721,7 +1698,7 @@ static void asm_head_root_base(ASMState *as)
   if (as->loopinv) as->mctop--;
   if (as->loopinv) as->mctop--;
   if (ra_hasreg(r)) {
   if (ra_hasreg(r)) {
     ra_free(as, r);
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (r != RID_BASE)
     if (r != RID_BASE)
       emit_move(as, r, RID_BASE);
       emit_move(as, r, RID_BASE);
@@ -1736,7 +1713,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   if (as->loopinv) as->mctop--;
   if (as->loopinv) as->mctop--;
   if (ra_hasreg(r)) {
   if (ra_hasreg(r)) {
     ra_free(as, r);
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
     if (irp->r == r) {
       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
@@ -1769,131 +1746,13 @@ static void asm_tail_prep(ASMState *as)
   as->invmcp = as->loopref ? as->mcp : NULL;
   as->invmcp = as->loopref ? as->mcp : NULL;
 }
 }
 
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-    asm_comp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitnot(as, ir); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
-  case IR_BOR:  asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
-  case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
-  case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
-  case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
-  case IR_BROL: lua_assert(0); break;
-  case IR_BROR: asm_bitror(as, ir); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB: asm_sub(as, ir); break;
-  case IR_MUL: asm_mul(as, ir); break;
-  case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-  case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_MIN: asm_min_max(as, ir, 0); break;
-  case IR_MAX: asm_min_max(as, ir, 1); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 <= IRFPM_TRUNC)
-      asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    else if (ir->op2 == IRFPM_SQRT)
-      asm_fpunary(as, ir, MIPSI_SQRT_D);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-
-  /* Overflow-checking arithmetic ops. */
-  case IR_ADDOV: asm_arithov(as, ir); break;
-  case IR_SUBOV: asm_arithov(as, ir); break;
-  case IR_MULOV: asm_mulov(as, ir); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 /* -- Trace setup --------------------------------------------------------- */
 
 
 /* Ensure there are enough stack slots for call arguments. */
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
 {
-  IRRef args[CCI_NARGS_MAX];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++) {
   for (i = 0; i < nargs; i++) {

+ 119 - 269
jni/LuaJIT-2.0.1/src/lj_asm_ppc.h → jni/LuaJIT-2.1/src/lj_asm_ppc.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** PPC IR assembler (SSA IR -> machine code).
 ** PPC IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Register allocator extensions --------------------------------------- */
 /* -- Register allocator extensions --------------------------------------- */
@@ -49,6 +49,8 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
 {
 {
   ExitNo i;
   ExitNo i;
   MCode *mxp = as->mctop;
   MCode *mxp = as->mctop;
+  if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+    asm_mclimit(as);
   /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
   /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
   for (i = nexits-1; (int32_t)i >= 0; i--)
   for (i = nexits-1; (int32_t)i >= 0; i--)
     *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
     *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
@@ -249,7 +251,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
 /* Generate a call to a C function. */
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 8;
   int32_t ofs = 8;
   Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
   Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
   if ((void *)ci->func)
   if ((void *)ci->func)
@@ -286,6 +288,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
       else
       else
 	ofs += 4;
 	ofs += 4;
     }
     }
+    checkmclim(as);
   }
   }
   if ((ci->flags & CCI_VARARG))  /* Vararg calls need to know about FPR use. */
   if ((ci->flags & CCI_VARARG))  /* Vararg calls need to know about FPR use. */
     emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
     emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
@@ -320,26 +323,19 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       } else {
       } else {
 	ra_destreg(as, ir, RID_FPRET);
 	ra_destreg(as, ir, RID_FPRET);
       }
       }
+#if LJ_32
     } else if (hiop) {
     } else if (hiop) {
       ra_destpair(as, ir);
       ra_destpair(as, ir);
+#endif
     } else {
     } else {
       ra_destreg(as, ir, RID_RET);
       ra_destreg(as, ir, RID_RET);
     }
     }
   }
   }
 }
 }
 
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 static void asm_callx(ASMState *as, IRIns *ir)
 {
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   CCallInfo ci;
   CCallInfo ci;
   IRRef func;
   IRRef func;
   IRIns *irf;
   IRIns *irf;
@@ -349,7 +345,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
   func = ir->op2; irf = IR(func);
   func = ir->op2; irf = IR(func);
   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
   if (irref_isk(func)) {  /* Call to constant address. */
   if (irref_isk(func)) {  /* Call to constant address. */
-    ci.func = (ASMFunction)(void *)(irf->i);
+    ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
   } else {  /* Need a non-argument register for indirect calls. */
   } else {  /* Need a non-argument register for indirect calls. */
     RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
     RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
     Reg freg = ra_alloc1(as, func, allow);
     Reg freg = ra_alloc1(as, func, allow);
@@ -360,16 +356,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
   asm_gencall(as, &ci, args);
   asm_gencall(as, &ci, args);
 }
 }
 
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 /* -- Returns ------------------------------------------------------------- */
 /* -- Returns ------------------------------------------------------------- */
 
 
 /* Return to lower frame. Guard that it goes to the right spot. */
 /* Return to lower frame. Guard that it goes to the right spot. */
@@ -377,9 +363,10 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   emit_setgl(as, base, jit_base);
   emit_setgl(as, base, jit_base);
   emit_addptr(as, base, -8*delta);
   emit_addptr(as, base, -8*delta);
   asm_guardcc(as, CC_NE);
   asm_guardcc(as, CC_NE);
@@ -509,28 +496,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  const CCallInfo *ci;
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = (ir-1)->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = &lj_ir_callinfo[id];
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
 {
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -549,6 +514,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
   emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
   emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
 }
 }
 
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
 {
@@ -562,7 +529,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
     /* Otherwise use g->tmptv to hold the TValue. */
     /* Otherwise use g->tmptv to hold the TValue. */
     RegSet allow = rset_exclude(RSET_GPR, dest);
     RegSet allow = rset_exclude(RSET_GPR, dest);
     Reg type;
     Reg type;
-    emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
+    emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
     if (!irt_ispri(ir->t)) {
     if (!irt_ispri(ir->t)) {
       Reg src = ra_alloc1(as, ref, allow);
       Reg src = ra_alloc1(as, ref, allow);
       emit_setgl(as, src, tmptv.gcr);
       emit_setgl(as, src, tmptv.gcr);
@@ -572,27 +539,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
   }
 }
 }
 
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 static void asm_aref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -769,20 +715,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -913,7 +845,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 }
 
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
 {
   IRIns *irb;
   IRIns *irb;
   if (ir->r == RID_SINK)
   if (ir->r == RID_SINK)
@@ -930,6 +862,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
   }
 }
 }
 
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
 {
   IRType1 t = ir->t;
   IRType1 t = ir->t;
@@ -1080,19 +1014,15 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  IRRef args[4];
   RegSet drop = RSET_SCRATCH;
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
   ra_evictset(as, drop);
@@ -1101,6 +1031,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
 
 
   /* Initialize immutable cdata object. */
   /* Initialize immutable cdata object. */
   if (ir->o == IR_CNEWI) {
   if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
     int32_t ofs = sizeof(GCcdata);
     int32_t ofs = sizeof(GCcdata);
     lua_assert(sz == 4 || sz == 8);
     lua_assert(sz == 4 || sz == 8);
     if (sz == 8) {
     if (sz == 8) {
@@ -1114,12 +1045,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; ir++;
       ofs -= 4; ir++;
     }
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
   emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
-  emit_ti(as, PPCI_LI, RID_TMP, ctypeid);  /* Lower 16 bit used. Sign-ext ok. */
+  emit_ti(as, PPCI_LI, RID_TMP, id);  /* Lower 16 bit used. Sign-ext ok. */
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1193,23 +1136,14 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
   emit_fb(as, pi, dest, left);
   emit_fb(as, pi, dest, left);
 }
 }
 
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
+    asm_fpunary(as, ir, PPCI_FSQRT);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 }
 
 
 static void asm_add(ASMState *as, IRIns *ir)
 static void asm_add(ASMState *as, IRIns *ir)
@@ -1309,6 +1243,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_div(as, ir)		asm_fparith(as, ir, PPCI_FDIV)
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {
   if (irt_isnum(ir->t)) {
   if (irt_isnum(ir->t)) {
@@ -1327,6 +1265,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_abs(as, ir)		asm_fpunary(as, ir, PPCI_FABS)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
 {
 {
   Reg dest, left, right;
   Reg dest, left, right;
@@ -1342,6 +1284,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
   emit_tab(as, pi|PPCF_DOT, dest, left, right);
   emit_tab(as, pi|PPCF_DOT, dest, left, right);
 }
 }
 
 
+#define asm_addov(as, ir)	asm_arithov(as, ir, PPCI_ADDO)
+#define asm_subov(as, ir)	asm_arithov(as, ir, PPCI_SUBFO)
+#define asm_mulov(as, ir)	asm_arithov(as, ir, PPCI_MULLWO)
+
 #if LJ_HASFFI
 #if LJ_HASFFI
 static void asm_add64(ASMState *as, IRIns *ir)
 static void asm_add64(ASMState *as, IRIns *ir)
 {
 {
@@ -1421,7 +1367,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
 }
 }
 #endif
 #endif
 
 
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
 {
 {
   Reg dest, left, right;
   Reg dest, left, right;
   PPCIns pi = PPCI_NOR;
   PPCIns pi = PPCI_NOR;
@@ -1448,7 +1394,7 @@ nofuse:
   emit_asb(as, pi, dest, left, right);
   emit_asb(as, pi, dest, left, right);
 }
 }
 
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
   IRIns *irx;
   IRIns *irx;
@@ -1469,32 +1415,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
-{
-  Reg dest = ra_dest(as, ir, RSET_GPR);
-  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
-  if (irref_isk(ir->op2)) {
-    int32_t k = IR(ir->op2)->i;
-    Reg tmp = left;
-    if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
-      if (!checku16(k)) {
-	emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
-	if ((k & 0xffff) == 0) return;
-      }
-      emit_asi(as, pik, dest, left, k);
-      return;
-    }
-  }
-  /* May fail due to spills/restores above, but simplifies the logic. */
-  if (as->flagmcp == as->mcp) {
-    as->flagmcp = NULL;
-    as->mcp++;
-    pi |= PPCF_DOT;
-  }
-  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
-  emit_asb(as, pi, dest, left, right);
-}
-
 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
 {
 {
@@ -1525,7 +1445,7 @@ nofuse:
   *--as->mcp = pi | PPCF_T(left);
   *--as->mcp = pi | PPCF_T(left);
 }
 }
 
 
-static void asm_bitand(ASMState *as, IRIns *ir)
+static void asm_band(ASMState *as, IRIns *ir)
 {
 {
   Reg dest, left, right;
   Reg dest, left, right;
   IRRef lref = ir->op1;
   IRRef lref = ir->op1;
@@ -1580,6 +1500,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
   emit_asb(as, PPCI_AND ^ dot, dest, left, right);
   emit_asb(as, PPCI_AND ^ dot, dest, left, right);
 }
 }
 
 
+static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    Reg tmp = left;
+    if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
+      if (!checku16(k)) {
+	emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
+	if ((k & 0xffff) == 0) return;
+      }
+      emit_asi(as, pik, dest, left, k);
+      return;
+    }
+  }
+  /* May fail due to spills/restores above, but simplifies the logic. */
+  if (as->flagmcp == as->mcp) {
+    as->flagmcp = NULL;
+    as->mcp++;
+    pi |= PPCF_DOT;
+  }
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_asb(as, pi, dest, left, right);
+}
+
+#define asm_bor(as, ir)		asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
 {
 {
   Reg dest, left;
   Reg dest, left;
@@ -1605,6 +1554,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
   }
   }
 }
 }
 
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, PPCI_SLW, 0)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, PPCI_SRW, 1)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
+#define asm_brol(as, ir) \
+  asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
+		       PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
+#define asm_bror(as, ir)	lua_assert(0)
+
 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
 {
 {
   if (irt_isnum(ir->t)) {
   if (irt_isnum(ir->t)) {
@@ -1635,6 +1592,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
   }
   }
 }
 }
 
 
+#define asm_min(as, ir)		asm_min_max(as, ir, 0)
+#define asm_max(as, ir)		asm_min_max(as, ir, 1)
+
 /* -- Comparisons --------------------------------------------------------- */
 /* -- Comparisons --------------------------------------------------------- */
 
 
 #define CC_UNSIGNED	0x08	/* Unsigned integer comparison. */
 #define CC_UNSIGNED	0x08	/* Unsigned integer comparison. */
@@ -1711,6 +1671,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_HASFFI
 #if LJ_HASFFI
 /* 64 bit integer comparisons. */
 /* 64 bit integer comparisons. */
 static void asm_comp64(ASMState *as, IRIns *ir)
 static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1756,8 +1718,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   } else if ((ir-1)->o == IR_XSTORE) {
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
     if ((ir-1)->r != RID_SINK) {
-      asm_xstore(as, ir, 0);
-      asm_xstore(as, ir-1, 4);
+      asm_xstore_(as, ir, 0);
+      asm_xstore_(as, ir-1, 4);
     }
     }
     return;
     return;
   }
   }
@@ -1781,6 +1743,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
+  emit_lsglptr(as, PPCI_LBZ, RID_TMP,
+	       (int32_t)offsetof(global_State, hookmask));
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 /* -- Stack handling ------------------------------------------------------ */
 
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1802,7 +1775,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
   emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
     emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
 }
 }
@@ -1900,7 +1873,7 @@ static void asm_head_root_base(ASMState *as)
   Reg r = ir->r;
   Reg r = ir->r;
   if (ra_hasreg(r)) {
   if (ra_hasreg(r)) {
     ra_free(as, r);
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (r != RID_BASE)
     if (r != RID_BASE)
       emit_mr(as, r, RID_BASE);
       emit_mr(as, r, RID_BASE);
@@ -1914,7 +1887,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   Reg r = ir->r;
   Reg r = ir->r;
   if (ra_hasreg(r)) {
   if (ra_hasreg(r)) {
     ra_free(as, r);
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
     if (irp->r == r) {
       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
@@ -1963,136 +1936,13 @@ static void asm_tail_prep(ASMState *as)
   }
   }
 }
 }
 
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-      break;
-    }
-    /* fallthrough */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-    asm_comp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitnot(as, ir); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitand(as, ir); break;
-  case IR_BOR:  asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
-  case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
-  case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
-  case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
-  case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
-			     PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
-  case IR_BROR: lua_assert(0); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB: asm_sub(as, ir); break;
-  case IR_MUL: asm_mul(as, ir); break;
-  case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-  case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_MIN: asm_min_max(as, ir, 0); break;
-  case IR_MAX: asm_min_max(as, ir, 1); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
-      asm_fpunary(as, ir, PPCI_FSQRT);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-
-  /* Overflow-checking arithmetic ops. */
-  case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
-  case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
-  case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir, 0); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 /* -- Trace setup --------------------------------------------------------- */
 
 
 /* Ensure there are enough stack slots for call arguments. */
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
 {
-  IRRef args[CCI_NARGS_MAX];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++)
   for (i = 0; i < nargs; i++)

+ 227 - 389
jni/LuaJIT-2.0.1/src/lj_asm_x86.h → jni/LuaJIT-2.1/src/lj_asm_x86.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** x86/x64 IR assembler (SSA IR -> machine code).
 ** x86/x64 IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Guard handling ------------------------------------------------------ */
 /* -- Guard handling ------------------------------------------------------ */
@@ -325,6 +325,14 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
       as->mrm.base = as->mrm.idx = RID_NONE;
       as->mrm.base = as->mrm.idx = RID_NONE;
       return RID_MRM;
       return RID_MRM;
     }
     }
+  } else if (ir->o == IR_KINT64) {
+    RegSet avail = as->freeset & ~as->modset & RSET_GPR;
+    lua_assert(allow != RSET_EMPTY);
+    if (!(avail & (avail-1))) {  /* Fuse if less than two regs available. */
+      as->mrm.ofs = ptr2addr(ir_kint64(ir));
+      as->mrm.base = as->mrm.idx = RID_NONE;
+      return RID_MRM;
+    }
   } else if (mayfuse(as, ref)) {
   } else if (mayfuse(as, ref)) {
     RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
     RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
     if (ir->o == IR_SLOAD) {
     if (ir->o == IR_SLOAD) {
@@ -361,7 +369,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
       return RID_MRM;
       return RID_MRM;
     }
     }
   }
   }
-  if (!(as->freeset & allow) &&
+  if (!(as->freeset & allow) && !irref_isk(ref) &&
       (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
       (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
     goto fusespill;
     goto fusespill;
   return ra_allocref(as, ref, allow);
   return ra_allocref(as, ref, allow);
@@ -384,7 +392,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
 /* Count the required number of stack slots for a call. */
 /* Count the required number of stack slots for a call. */
 static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t i, nargs = CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 0;
   int nslots = 0;
 #if LJ_64
 #if LJ_64
   if (LJ_ABI_WIN) {
   if (LJ_ABI_WIN) {
@@ -417,7 +425,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 /* Generate a call to a C function. */
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = STACKARG_OFS;
   int32_t ofs = STACKARG_OFS;
 #if LJ_64
 #if LJ_64
   uint32_t gprs = REGARG_GPRS;
   uint32_t gprs = REGARG_GPRS;
@@ -512,6 +520,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
       }
       }
       ofs += sizeof(intptr_t);
       ofs += sizeof(intptr_t);
     }
     }
+    checkmclim(as);
   }
   }
 #if LJ_64 && !LJ_ABI_WIN
 #if LJ_64 && !LJ_ABI_WIN
   if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR;
   if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR;
@@ -551,7 +560,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       if (ra_hasreg(dest)) {
       if (ra_hasreg(dest)) {
 	ra_free(as, dest);
 	ra_free(as, dest);
 	ra_modified(as, dest);
 	ra_modified(as, dest);
-	emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
+	emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
 		  dest, RID_ESP, ofs);
 		  dest, RID_ESP, ofs);
       }
       }
       if ((ci->flags & CCI_CASTU64)) {
       if ((ci->flags & CCI_CASTU64)) {
@@ -570,20 +579,11 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       lua_assert(!irt_ispri(ir->t));
       lua_assert(!irt_ispri(ir->t));
       ra_destreg(as, ir, RID_RET);
       ra_destreg(as, ir, RID_RET);
     }
     }
-  } else if (LJ_32 && irt_isfp(ir->t)) {
+  } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) {
     emit_x87op(as, XI_FPOP);  /* Pop unused result from x87 st0. */
     emit_x87op(as, XI_FPOP);  /* Pop unused result from x87 st0. */
   }
   }
 }
 }
 
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 /* Return a constant function pointer or NULL for indirect calls. */
 /* Return a constant function pointer or NULL for indirect calls. */
 static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
 static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
 {
 {
@@ -608,7 +608,7 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
 
 
 static void asm_callx(ASMState *as, IRIns *ir)
 static void asm_callx(ASMState *as, IRIns *ir)
 {
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   CCallInfo ci;
   CCallInfo ci;
   IRRef func;
   IRRef func;
   IRIns *irf;
   IRIns *irf;
@@ -643,9 +643,10 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   emit_setgl(as, base, jit_base);
   emit_setgl(as, base, jit_base);
   emit_addptr(as, base, -8*delta);
   emit_addptr(as, base, -8*delta);
   asm_guardcc(as, CC_NE);
   asm_guardcc(as, CC_NE);
@@ -662,8 +663,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
   asm_guardcc(as, CC_NE);
   asm_guardcc(as, CC_NE);
   emit_rr(as, XO_UCOMISD, left, tmp);
   emit_rr(as, XO_UCOMISD, left, tmp);
   emit_rr(as, XO_CVTSI2SD, tmp, dest);
   emit_rr(as, XO_CVTSI2SD, tmp, dest);
-  if (!(as->flags & JIT_F_SPLIT_XMM))
-    emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
+  emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
   emit_rr(as, XO_CVTTSD2SI, dest, left);
   emit_rr(as, XO_CVTTSD2SI, dest, left);
   /* Can't fuse since left is needed twice. */
   /* Can't fuse since left is needed twice. */
 }
 }
@@ -719,8 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
       emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
 	       dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
 	       dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
     }
     }
-    if (!(as->flags & JIT_F_SPLIT_XMM))
-      emit_rr(as, XO_XORPS, dest, dest);  /* Avoid partial register stall. */
+    emit_rr(as, XO_XORPS, dest, dest);  /* Avoid partial register stall. */
   } else if (stfp) {  /* FP to integer conversion. */
   } else if (stfp) {  /* FP to integer conversion. */
     if (irt_isguard(ir->t)) {
     if (irt_isguard(ir->t)) {
       /* Checked conversions are only supported from number to int. */
       /* Checked conversions are only supported from number to int. */
@@ -728,9 +727,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
     } else {
     } else {
       Reg dest = ra_dest(as, ir, RSET_GPR);
       Reg dest = ra_dest(as, ir, RSET_GPR);
-      x86Op op = st == IRT_NUM ?
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
+      x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
       if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
       if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
 	/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
 	/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
 	/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
 	/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -824,8 +821,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
   if (ra_hasreg(dest)) {
   if (ra_hasreg(dest)) {
     ra_free(as, dest);
     ra_free(as, dest);
     ra_modified(as, dest);
     ra_modified(as, dest);
-    emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
-	      dest, RID_ESP, ofs);
+    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
   }
   }
   emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
   emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
 	    irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
 	    irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -853,7 +849,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
   Reg lo, hi;
   Reg lo, hi;
   lua_assert(st == IRT_NUM || st == IRT_FLOAT);
   lua_assert(st == IRT_NUM || st == IRT_FLOAT);
   lua_assert(dt == IRT_I64 || dt == IRT_U64);
   lua_assert(dt == IRT_I64 || dt == IRT_U64);
-  lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
   hi = ra_dest(as, ir, RSET_GPR);
   hi = ra_dest(as, ir, RSET_GPR);
   lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
   lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
   if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
   if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -896,6 +891,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
 	   st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
 	   st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
 	   asm_fuseload(as, ir->op1, RSET_EMPTY));
 	   asm_fuseload(as, ir->op1, RSET_EMPTY));
 }
 }
+
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  if (irt_isfp(ir->t))
+    asm_conv_fp_int64(as, ir);
+  else
+    asm_conv_int64_fp(as, ir);
+}
 #endif
 #endif
 
 
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
@@ -917,29 +920,32 @@ static void asm_strto(ASMState *as, IRIns *ir)
 	    RID_ESP, sps_scale(ir->s));
 	    RID_ESP, sps_scale(ir->s));
 }
 }
 
 
-static void asm_tostr(ASMState *as, IRIns *ir)
+/* -- Memory references --------------------------------------------------- */
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
 {
-  IRIns *irl = IR(ir->op1);
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(irl->t)) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
-	      RID_ESP, ra_spill(as, irl));
+  IRIns *ir = IR(ref);
+  if (irt_isnum(ir->t)) {
+    /* For numbers use the constant itself or a spill slot as a TValue. */
+    if (irref_isk(ref))
+      emit_loada(as, dest, ir_knum(ir));
+    else
+      emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
   } else {
   } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
+    /* Otherwise use g->tmptv to hold the TValue. */
+    if (!irref_isk(ref)) {
+      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+    } else if (!irt_ispri(ir->t)) {
+      emit_movmroi(as, dest, 0, ir->i);
+    }
+    if (!(LJ_64 && irt_islightud(ir->t)))
+      emit_movmroi(as, dest, 4, irt_toitype(ir->t));
+    emit_loada(as, dest, &J2G(as->J)->tmptv);
   }
   }
 }
 }
 
 
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 static void asm_aref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -950,23 +956,6 @@ static void asm_aref(ASMState *as, IRIns *ir)
     emit_rr(as, XO_MOV, dest, as->mrm.base);
     emit_rr(as, XO_MOV, dest, as->mrm.base);
 }
 }
 
 
-/* Merge NE(HREF, niltv) check. */
-static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
-{
-  /* Assumes nothing else generates NE of HREF. */
-  if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
-      ra_hasreg(ir->r)) {
-    MCode *p = as->mcp;
-    p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
-    /* Ensure no loop branch inversion happened. */
-    if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
-      as->mcp = p;  /* Kill cmp reg, imm32 + jz exit. */
-      return p + *(int32_t *)(p-4);  /* Return exit address. */
-    }
-  }
-  return NULL;
-}
-
 /* Inlined hash lookup. Specialized for key type and for const keys.
 /* Inlined hash lookup. Specialized for key type and for const keys.
 ** The equivalent C code is:
 ** The equivalent C code is:
 **   Node *n = hashkey(t, key);
 **   Node *n = hashkey(t, key);
@@ -975,10 +964,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
 **   } while ((n = nextnode(n)));
 **   } while ((n = nextnode(n)));
 **   return niltv(L);
 **   return niltv(L);
 */
 */
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 {
 {
-  MCode *nilexit = merge_href_niltv(as, ir);  /* Do this before any restores. */
   RegSet allow = RSET_GPR;
   RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
   Reg dest = ra_dest(as, ir, allow);
   Reg dest = ra_dest(as, ir, allow);
   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
   Reg key = RID_NONE, tmp = RID_NONE;
   Reg key = RID_NONE, tmp = RID_NONE;
@@ -995,14 +984,12 @@ static void asm_href(ASMState *as, IRIns *ir)
       tmp = ra_scratch(as, rset_exclude(allow, key));
       tmp = ra_scratch(as, rset_exclude(allow, key));
   }
   }
 
 
-  /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
   l_end = emit_label(as);
   l_end = emit_label(as);
-  if (nilexit && ir[1].o == IR_NE) {
-    emit_jcc(as, CC_E, nilexit);  /* XI_JMP is not found by lj_asm_patchexit. */
-    nilexit = NULL;
-  } else {
+  if (merge == IR_NE)
+    asm_guardcc(as, CC_E);  /* XI_JMP is not found by lj_asm_patchexit. */
+  else if (destused)
     emit_loada(as, dest, niltvg(J2G(as->J)));
     emit_loada(as, dest, niltvg(J2G(as->J)));
-  }
 
 
   /* Follow hash chain until the end. */
   /* Follow hash chain until the end. */
   l_loop = emit_sjcc_label(as, CC_NZ);
   l_loop = emit_sjcc_label(as, CC_NZ);
@@ -1011,8 +998,8 @@ static void asm_href(ASMState *as, IRIns *ir)
   l_next = emit_label(as);
   l_next = emit_label(as);
 
 
   /* Type and value comparison. */
   /* Type and value comparison. */
-  if (nilexit)
-    emit_jcc(as, CC_E, nilexit);
+  if (merge == IR_EQ)
+    asm_guardcc(as, CC_E);
   else
   else
     emit_sjcc(as, CC_E, l_end);
     emit_sjcc(as, CC_E, l_end);
   if (irt_isnum(kt)) {
   if (irt_isnum(kt)) {
@@ -1168,41 +1155,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  IRIns *irkey;
-  Reg tmp;
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  tmp = ra_releasetmp(as, ASMREF_TMP1);
-  irkey = IR(ir->op2);
-  if (irt_isnum(irkey->t)) {
-    /* For numbers use the constant itself or a spill slot as a TValue. */
-    if (irref_isk(ir->op2))
-      emit_loada(as, tmp, ir_knum(irkey));
-    else
-      emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    if (!irref_isk(ir->op2)) {
-      Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
-      emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
-    } else if (!irt_ispri(irkey->t)) {
-      emit_movmroi(as, tmp, 0, irkey->i);
-    }
-    if (!(LJ_64 && irt_islightud(irkey->t)))
-      emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
-    emit_loada(as, tmp, &J2G(as->J)->tmptv);
-  }
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1262,7 +1214,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
   case IRT_U8: xo = XO_MOVZXb; break;
   case IRT_U8: xo = XO_MOVZXb; break;
   case IRT_I16: xo = XO_MOVSXw; break;
   case IRT_I16: xo = XO_MOVSXw; break;
   case IRT_U16: xo = XO_MOVZXw; break;
   case IRT_U16: xo = XO_MOVZXw; break;
-  case IRT_NUM: xo = XMM_MOVRM(as); break;
+  case IRT_NUM: xo = XO_MOVSD; break;
   case IRT_FLOAT: xo = XO_MOVSS; break;
   case IRT_FLOAT: xo = XO_MOVSS; break;
   default:
   default:
     if (LJ_64 && irt_is64(ir->t))
     if (LJ_64 && irt_is64(ir->t))
@@ -1275,6 +1227,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
   emit_mrm(as, xo, dest, RID_MRM);
   emit_mrm(as, xo, dest, RID_MRM);
 }
 }
 
 
+#define asm_fload(as, ir)	asm_fxload(as, ir)
+#define asm_xload(as, ir)	asm_fxload(as, ir)
+
 static void asm_fxstore(ASMState *as, IRIns *ir)
 static void asm_fxstore(ASMState *as, IRIns *ir)
 {
 {
   RegSet allow = RSET_GPR;
   RegSet allow = RSET_GPR;
@@ -1338,6 +1293,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_fstore(as, ir)	asm_fxstore(as, ir)
+#define asm_xstore(as, ir)	asm_fxstore(as, ir)
+
 #if LJ_64
 #if LJ_64
 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
 {
 {
@@ -1376,7 +1334,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
     RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
     Reg dest = ra_dest(as, ir, allow);
     Reg dest = ra_dest(as, ir, allow);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
-    emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
+    emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
   } else {
   } else {
     asm_fuseahuref(as, ir->op1, RSET_GPR);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
   }
   }
@@ -1442,7 +1400,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
     Reg left = ra_scratch(as, RSET_FPR);
     Reg left = ra_scratch(as, RSET_FPR);
     asm_tointg(as, ir, left);  /* Frees dest reg. Do this before base alloc. */
     asm_tointg(as, ir, left);  /* Frees dest reg. Do this before base alloc. */
     base = ra_alloc1(as, REF_BASE, RSET_GPR);
     base = ra_alloc1(as, REF_BASE, RSET_GPR);
-    emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
+    emit_rmro(as, XO_MOVSD, left, base, ofs);
     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
 #if LJ_64
 #if LJ_64
   } else if (irt_islightud(t)) {
   } else if (irt_islightud(t)) {
@@ -1460,11 +1418,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
     if ((ir->op2 & IRSLOAD_CONVERT)) {
     if ((ir->op2 & IRSLOAD_CONVERT)) {
       t.irt = irt_isint(t) ? IRT_NUM : IRT_INT;  /* Check for original type. */
       t.irt = irt_isint(t) ? IRT_NUM : IRT_INT;  /* Check for original type. */
-      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
-    } else if (irt_isnum(t)) {
-      emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
+      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
     } else {
     } else {
-      emit_rmro(as, XO_MOV, dest, base, ofs);
+      emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
     }
     }
   } else {
   } else {
     if (!(ir->op2 & IRSLOAD_TYPECHECK))
     if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1491,15 +1447,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  lua_assert(sz != CTSIZE_INVALID);
+  IRRef args[4];
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   as->gcsteps++;
   asm_setupresult(as, ir, ci);  /* GCcdata * */
   asm_setupresult(as, ir, ci);  /* GCcdata * */
 
 
@@ -1542,15 +1496,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
     } while (1);
     } while (1);
 #endif
 #endif
     lua_assert(sz == 4 || sz == 8);
     lua_assert(sz == 4 || sz == 8);
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
   }
 
 
   /* Combine initialization of marked, gct and ctypeid. */
   /* Combine initialization of marked, gct and ctypeid. */
   emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
   emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
   emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
   emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
-	   (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16)));
+	   (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
   emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
   emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
   emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
   emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
 
 
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
   emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
 }
 }
@@ -1628,36 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
   }
   }
 }
 }
 
 
-/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
-static int fpmjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      /* The modified regs must match with the *.dasc implementation. */
-      RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-      IRIns *irx;
-      if (ra_hasreg(ir->r))
-	rset_clear(drop, ir->r);  /* Dest reg handled below. */
-      ra_evictset(as, drop);
-      ra_destreg(as, ir, RID_XMM0);
-      emit_call(as, lj_vm_pow_sse);
-      irx = IR(irpp->op1);
-      if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
-	irx->r = RID_INIT;  /* Avoid allocating xmm1 for x. */
-      ra_left(as, RID_XMM0, irpp->op1);
-      ra_left(as, RID_XMM1, irp->op2);
-      return 1;
-    }
-  }
-  return 0;
-}
-
 static void asm_fpmath(ASMState *as, IRIns *ir)
 static void asm_fpmath(ASMState *as, IRIns *ir)
 {
 {
-  IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
+  IRFPMathOp fpm = (IRFPMathOp)ir->op2;
   if (fpm == IRFPM_SQRT) {
   if (fpm == IRFPM_SQRT) {
     Reg dest = ra_dest(as, ir, RSET_FPR);
     Reg dest = ra_dest(as, ir, RSET_FPR);
     Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
     Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1688,53 +1626,31 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
 		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
 		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
       ra_left(as, RID_XMM0, ir->op1);
       ra_left(as, RID_XMM0, ir->op1);
     }
     }
-  } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
+  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
     /* Rejoined to pow(). */
     /* Rejoined to pow(). */
-  } else {  /* Handle x87 ops. */
-    int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
-    Reg dest = ir->r;
-    if (ra_hasreg(dest)) {
-      ra_free(as, dest);
-      ra_modified(as, dest);
-      emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
-    }
-    emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
-    switch (fpm) {  /* st0 = lj_vm_*(st0) */
-    case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
-    case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
-    case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
-    case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
-    case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
-    case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
-      /* Note: the use of fyl2xp1 would be pointless here. When computing
-      ** log(1.0+eps) the precision is already lost after 1.0 is added.
-      ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
-      */
-      emit_x87op(as, XI_FYL2X); break;
-    case IRFPM_OTHER:
-      switch (ir->o) {
-      case IR_ATAN2:
-	emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
-      case IR_LDEXP:
-	emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
-      default: lua_assert(0); break;
-      }
-      break;
-    default: lua_assert(0); break;
-    }
-    asm_x87load(as, ir->op1);
-    switch (fpm) {
-    case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
-    case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
-    case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
-    case IRFPM_OTHER:
-      if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
-      break;
-    default: break;
-    }
+  } else {
+    asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
   }
   }
 }
 }
 
 
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+
+static void asm_ldexp(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
+  Reg dest = ir->r;
+  if (ra_hasreg(dest)) {
+    ra_free(as, dest);
+    ra_modified(as, dest);
+    emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
+  }
+  emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+  emit_x87op(as, XI_FPOP1);
+  emit_x87op(as, XI_FSCALE);
+  asm_x87load(as, ir->op1);
+  asm_x87load(as, ir->op2);
+}
+
 static void asm_fppowi(ASMState *as, IRIns *ir)
 static void asm_fppowi(ASMState *as, IRIns *ir)
 {
 {
   /* The modified regs must match with the *.dasc implementation. */
   /* The modified regs must match with the *.dasc implementation. */
@@ -1748,26 +1664,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
   ra_left(as, RID_EAX, ir->op2);
   ra_left(as, RID_EAX, ir->op2);
 }
 }
 
 
-#if LJ_64 && LJ_HASFFI
-static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_pow(ASMState *as, IRIns *ir)
 {
 {
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+					  IRCALL_lj_carith_powu64);
+  else
 #endif
 #endif
-
-static void asm_intmod(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
+    asm_fppowi(as, ir);
 }
 }
 
 
 static int asm_swapops(ASMState *as, IRIns *ir)
 static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1826,8 +1731,12 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
   Reg dest, right;
   Reg dest, right;
   int32_t k = 0;
   int32_t k = 0;
   if (as->flagmcp == as->mcp) {  /* Drop test r,r instruction. */
   if (as->flagmcp == as->mcp) {  /* Drop test r,r instruction. */
-    as->flagmcp = NULL;
-    as->mcp += (LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2;
+    MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2);
+    if ((p[1] & 15) < 14) {
+      if ((p[1] & 15) >= 12) p[1] -= 4;  /* L <->S, NL <-> NS */
+      as->flagmcp = NULL;
+      as->mcp = p;
+    }  /* else: cannot transform LE/NLE to cc without use of OF. */
   }
   }
   right = IR(rref)->r;
   right = IR(rref)->r;
   if (ra_hasreg(right)) {
   if (ra_hasreg(right)) {
@@ -1946,6 +1855,44 @@ static void asm_add(ASMState *as, IRIns *ir)
     asm_intarith(as, ir, XOg_ADD);
     asm_intarith(as, ir, XOg_ADD);
 }
 }
 
 
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_SUBSD);
+  else  /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
+    asm_intarith(as, ir, XOg_SUB);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MULSD);
+  else
+    asm_intarith(as, ir, XOg_X_IMUL);
+}
+
+static void asm_div(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+					  IRCALL_lj_carith_divu64);
+  else
+#endif
+    asm_fparith(as, ir, XO_DIVSD);
+}
+
+static void asm_mod(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isint(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					  IRCALL_lj_carith_modu64);
+  else
+#endif
+    asm_callid(as, ir, IRCALL_lj_vm_modi);
+}
+
 static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
 static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1953,7 +1900,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
   ra_left(as, dest, ir->op1);
   ra_left(as, dest, ir->op1);
 }
 }
 
 
-static void asm_min_max(ASMState *as, IRIns *ir, int cc)
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_XORPS);
+  else
+    asm_neg_not(as, ir, XOg_NEG);
+}
+
+#define asm_abs(as, ir)		asm_fparith(as, ir, XO_ANDPS)
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 {
 {
   Reg right, dest = ra_dest(as, ir, RSET_GPR);
   Reg right, dest = ra_dest(as, ir, RSET_GPR);
   IRRef lref = ir->op1, rref = ir->op2;
   IRRef lref = ir->op1, rref = ir->op2;
@@ -1964,7 +1921,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
   ra_left(as, dest, lref);
   ra_left(as, dest, lref);
 }
 }
 
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_min(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MINSD);
+  else
+    asm_intmin_max(as, ir, CC_G);
+}
+
+static void asm_max(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MAXSD);
+  else
+    asm_intmin_max(as, ir, CC_L);
+}
+
+/* Note: don't use LEA for overflow-checking arithmetic! */
+#define asm_addov(as, ir)	asm_intarith(as, ir, XOg_ADD)
+#define asm_subov(as, ir)	asm_intarith(as, ir, XOg_SUB)
+#define asm_mulov(as, ir)	asm_intarith(as, ir, XOg_X_IMUL)
+
+#define asm_bnot(as, ir)	asm_neg_not(as, ir, XOg_NOT)
+
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
   as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
   as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1972,6 +1952,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   ra_left(as, dest, ir->op1);
   ra_left(as, dest, ir->op1);
 }
 }
 
 
+#define asm_band(as, ir)	asm_intarith(as, ir, XOg_AND)
+#define asm_bor(as, ir)		asm_intarith(as, ir, XOg_OR)
+#define asm_bxor(as, ir)	asm_intarith(as, ir, XOg_XOR)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
 static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
 {
 {
   IRRef rref = ir->op2;
   IRRef rref = ir->op2;
@@ -2011,6 +1995,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
   */
   */
 }
 }
 
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, XOg_SHL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, XOg_SHR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, XOg_SAR)
+#define asm_brol(as, ir)	asm_bitshift(as, ir, XOg_ROL)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, XOg_ROR)
+
 /* -- Comparisons --------------------------------------------------------- */
 /* -- Comparisons --------------------------------------------------------- */
 
 
 /* Virtual flags for unordered FP comparisons. */
 /* Virtual flags for unordered FP comparisons. */
@@ -2037,8 +2027,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
 };
 };
 
 
 /* FP and integer comparisons. */
 /* FP and integer comparisons. */
-static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
+static void asm_comp(ASMState *as, IRIns *ir)
 {
 {
+  uint32_t cc = asm_compmap[ir->o];
   if (irt_isnum(ir->t)) {
   if (irt_isnum(ir->t)) {
     IRRef lref = ir->op1;
     IRRef lref = ir->op1;
     IRRef rref = ir->op2;
     IRRef rref = ir->op2;
@@ -2193,6 +2184,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
   }
   }
 }
 }
 
 
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_32 && LJ_HASFFI
 #if LJ_32 && LJ_HASFFI
 /* 64 bit integer comparisons in 32 bit mode. */
 /* 64 bit integer comparisons in 32 bit mode. */
 static void asm_comp_int64(ASMState *as, IRIns *ir)
 static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2211,6 +2204,7 @@ static void asm_comp_int64(ASMState *as, IRIns *ir)
     lefthi = asm_fuseload(as, ir->op1, allow);
     lefthi = asm_fuseload(as, ir->op1, allow);
   } else {
   } else {
     lefthi = ra_alloc1(as, ir->op1, allow);
     lefthi = ra_alloc1(as, ir->op1, allow);
+    rset_clear(allow, lefthi);
     righthi = asm_fuseload(as, ir->op2, allow);
     righthi = asm_fuseload(as, ir->op2, allow);
     if (righthi == RID_MRM) {
     if (righthi == RID_MRM) {
       if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base);
       if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base);
@@ -2226,13 +2220,8 @@ static void asm_comp_int64(ASMState *as, IRIns *ir)
     leftlo = asm_fuseload(as, (ir-1)->op1, allow);
     leftlo = asm_fuseload(as, (ir-1)->op1, allow);
   } else {
   } else {
     leftlo = ra_alloc1(as, (ir-1)->op1, allow);
     leftlo = ra_alloc1(as, (ir-1)->op1, allow);
+    rset_clear(allow, leftlo);
     rightlo = asm_fuseload(as, (ir-1)->op2, allow);
     rightlo = asm_fuseload(as, (ir-1)->op2, allow);
-    if (rightlo == RID_MRM) {
-      if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base);
-      if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx);
-    } else {
-      rset_clear(allow, rightlo);
-    }
   }
   }
 
 
   /* All register allocations must be performed _before_ this point. */
   /* All register allocations must be performed _before_ this point. */
@@ -2279,13 +2268,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
-    if (usehi || uselo) {
-      if (irt_isfp(ir->t))
-	asm_conv_fp_int64(as, ir);
-      else
-	asm_conv_int64_fp(as, ir);
-    }
     as->curins--;  /* Always skip the CONV. */
     as->curins--;  /* Always skip the CONV. */
+    if (usehi || uselo)
+      asm_conv64(as, ir);
     return;
     return;
   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
     asm_comp_int64(as, ir);
     asm_comp_int64(as, ir);
@@ -2334,6 +2319,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_i8(as, HOOK_PROFILE);
+  emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 /* -- Stack handling ------------------------------------------------------ */
 
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2355,7 +2350,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
     emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
     emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
 	      ptr2addr(&J2G(as->J)->jit_base));
 	      ptr2addr(&J2G(as->J)->jit_base));
   emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
   emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
-  emit_getgl(as, r, jit_L);
+  emit_getgl(as, r, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
     emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
 }
 }
@@ -2484,7 +2479,7 @@ static void asm_head_root_base(ASMState *as)
   Reg r = ir->r;
   Reg r = ir->r;
   if (ra_hasreg(r)) {
   if (ra_hasreg(r)) {
     ra_free(as, r);
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (r != RID_BASE)
     if (r != RID_BASE)
       emit_rr(as, XO_MOV, r, RID_BASE);
       emit_rr(as, XO_MOV, r, RID_BASE);
@@ -2498,7 +2493,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   Reg r = ir->r;
   Reg r = ir->r;
   if (ra_hasreg(r)) {
   if (ra_hasreg(r)) {
     ra_free(as, r);
     ra_free(as, r);
-    if (rset_test(as->modset, r))
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
     if (irp->r == r) {
       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
@@ -2583,169 +2578,12 @@ static void asm_tail_prep(ASMState *as)
   }
   }
 }
 }
 
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_EQ: case IR_NE: case IR_ABC:
-    asm_comp(as, ir, asm_compmap[ir->o]);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
-  case IR_BOR:  asm_intarith(as, ir, XOg_OR); break;
-  case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
-  case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
-  case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
-  case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
-  case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_SUBSD);
-    else  /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
-      asm_intarith(as, ir, XOg_SUB);
-    break;
-  case IR_MUL:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MULSD);
-    else
-      asm_intarith(as, ir, XOg_X_IMUL);
-    break;
-  case IR_DIV:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isnum(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
-					     IRCALL_lj_carith_divu64);
-    else
-#endif
-      asm_fparith(as, ir, XO_DIVSD);
-    break;
-  case IR_MOD:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isint(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
-					     IRCALL_lj_carith_modu64);
-    else
-#endif
-      asm_intmod(as, ir);
-    break;
-
-  case IR_NEG:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_XORPS);
-    else
-      asm_neg_not(as, ir, XOg_NEG);
-    break;
-  case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
-
-  case IR_MIN:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MINSD);
-    else
-      asm_min_max(as, ir, CC_G);
-    break;
-  case IR_MAX:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MAXSD);
-    else
-      asm_min_max(as, ir, CC_L);
-    break;
-
-  case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
-    asm_fpmath(as, ir);
-    break;
-  case IR_POW:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isnum(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
-					     IRCALL_lj_carith_powu64);
-    else
-#endif
-      asm_fppowi(as, ir);
-    break;
-
-  /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
-  case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
-  case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
-  case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 /* -- Trace setup --------------------------------------------------------- */
 
 
 /* Ensure there are enough stack slots for call arguments. */
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
 {
-  IRRef args[CCI_NARGS_MAX];
+  IRRef args[CCI_NARGS_MAX*2];
   int nslots;
   int nslots;
   asm_collectargs(as, ir, ci, args);
   asm_collectargs(as, ir, ci, args);
   nslots = asm_count_call_slots(as, ci, args);
   nslots = asm_count_call_slots(as, ci, args);

+ 1 - 1
jni/LuaJIT-2.0.1/src/lj_bc.c → jni/LuaJIT-2.1/src/lj_bc.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Bytecode instruction modes.
 ** Bytecode instruction modes.
-** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_bc_c
 #define lj_bc_c

Some files were not shown because too many files changed in this diff