2
0
Эх сурвалжийг харах

Merge commit '643fa4aad86bba3567b3b7441e36c2cbd438d79c' into master

Yao Wei Tjong 姚伟忠 10 жил өмнө
parent
commit
b36135489f
100 өөрчлөгдсөн 8043 нэмэгдсэн , 3145 устгасан
  1. 1 1
      Source/ThirdParty/LuaJIT/COPYRIGHT
  2. 3 3
      Source/ThirdParty/LuaJIT/README
  3. 1 1
      Source/ThirdParty/LuaJIT/dynasm/dasm_arm.h
  4. 9 6
      Source/ThirdParty/LuaJIT/dynasm/dasm_arm.lua
  5. 518 0
      Source/ThirdParty/LuaJIT/dynasm/dasm_arm64.h
  6. 1166 0
      Source/ThirdParty/LuaJIT/dynasm/dasm_arm64.lua
  7. 1 1
      Source/ThirdParty/LuaJIT/dynasm/dasm_mips.h
  8. 4 4
      Source/ThirdParty/LuaJIT/dynasm/dasm_mips.lua
  9. 11 4
      Source/ThirdParty/LuaJIT/dynasm/dasm_ppc.h
  10. 687 17
      Source/ThirdParty/LuaJIT/dynasm/dasm_ppc.lua
  11. 3 3
      Source/ThirdParty/LuaJIT/dynasm/dasm_proto.h
  12. 1 1
      Source/ThirdParty/LuaJIT/dynasm/dasm_x64.lua
  13. 34 7
      Source/ThirdParty/LuaJIT/dynasm/dasm_x86.h
  14. 433 100
      Source/ThirdParty/LuaJIT/dynasm/dasm_x86.lua
  15. 5 5
      Source/ThirdParty/LuaJIT/dynasm/dynasm.lua
  16. 14 12
      Source/ThirdParty/LuaJIT/src/host/buildvm.c
  17. 2 1
      Source/ThirdParty/LuaJIT/src/host/buildvm.h
  18. 52 11
      Source/ThirdParty/LuaJIT/src/host/buildvm_asm.c
  19. 1 1
      Source/ThirdParty/LuaJIT/src/host/buildvm_fold.c
  20. 61 2
      Source/ThirdParty/LuaJIT/src/host/buildvm_lib.c
  21. 45 0
      Source/ThirdParty/LuaJIT/src/host/buildvm_libbc.h
  22. 1 1
      Source/ThirdParty/LuaJIT/src/host/buildvm_peobj.c
  23. 197 0
      Source/ThirdParty/LuaJIT/src/host/genlibbc.lua
  24. 1 1
      Source/ThirdParty/LuaJIT/src/host/genminilua.lua
  25. 10 11
      Source/ThirdParty/LuaJIT/src/jit/bc.lua
  26. 16 14
      Source/ThirdParty/LuaJIT/src/jit/bcsave.lua
  27. 10 10
      Source/ThirdParty/LuaJIT/src/jit/dis_arm.lua
  28. 16 16
      Source/ThirdParty/LuaJIT/src/jit/dis_mips.lua
  29. 7 10
      Source/ThirdParty/LuaJIT/src/jit/dis_mipsel.lua
  30. 10 10
      Source/ThirdParty/LuaJIT/src/jit/dis_ppc.lua
  31. 7 10
      Source/ThirdParty/LuaJIT/src/jit/dis_x64.lua
  32. 183 91
      Source/ThirdParty/LuaJIT/src/jit/dis_x86.lua
  33. 20 12
      Source/ThirdParty/LuaJIT/src/jit/dump.lua
  34. 310 0
      Source/ThirdParty/LuaJIT/src/jit/p.lua
  35. 10 7
      Source/ThirdParty/LuaJIT/src/jit/v.lua
  36. 45 0
      Source/ThirdParty/LuaJIT/src/jit/zone.lua
  37. 2 2
      Source/ThirdParty/LuaJIT/src/lib_aux.c
  38. 34 53
      Source/ThirdParty/LuaJIT/src/lib_base.c
  39. 121 15
      Source/ThirdParty/LuaJIT/src/lib_bit.c
  40. 2 2
      Source/ThirdParty/LuaJIT/src/lib_debug.c
  41. 35 16
      Source/ThirdParty/LuaJIT/src/lib_ffi.c
  42. 1 1
      Source/ThirdParty/LuaJIT/src/lib_init.c
  43. 26 24
      Source/ThirdParty/LuaJIT/src/lib_io.c
  44. 130 26
      Source/ThirdParty/LuaJIT/src/lib_jit.c
  45. 5 8
      Source/ThirdParty/LuaJIT/src/lib_math.c
  46. 32 20
      Source/ThirdParty/LuaJIT/src/lib_os.c
  47. 12 4
      Source/ThirdParty/LuaJIT/src/lib_package.c
  48. 131 319
      Source/ThirdParty/LuaJIT/src/lib_string.c
  49. 88 81
      Source/ThirdParty/LuaJIT/src/lib_table.c
  50. 15 0
      Source/ThirdParty/LuaJIT/src/lj.supp
  51. 17 9
      Source/ThirdParty/LuaJIT/src/lj_alloc.c
  52. 75 62
      Source/ThirdParty/LuaJIT/src/lj_api.c
  53. 144 50
      Source/ThirdParty/LuaJIT/src/lj_arch.h
  54. 432 68
      Source/ThirdParty/LuaJIT/src/lj_asm.c
  55. 1 1
      Source/ThirdParty/LuaJIT/src/lj_asm.h
  56. 150 294
      Source/ThirdParty/LuaJIT/src/lj_asm_arm.h
  57. 111 255
      Source/ThirdParty/LuaJIT/src/lj_asm_mips.h
  58. 113 265
      Source/ThirdParty/LuaJIT/src/lj_asm_ppc.h
  59. 219 379
      Source/ThirdParty/LuaJIT/src/lj_asm_x86.h
  60. 1 1
      Source/ThirdParty/LuaJIT/src/lj_bc.c
  61. 5 1
      Source/ThirdParty/LuaJIT/src/lj_bc.h
  62. 5 3
      Source/ThirdParty/LuaJIT/src/lj_bcdump.h
  63. 63 82
      Source/ThirdParty/LuaJIT/src/lj_bcread.c
  64. 98 133
      Source/ThirdParty/LuaJIT/src/lj_bcwrite.c
  65. 234 0
      Source/ThirdParty/LuaJIT/src/lj_buf.c
  66. 105 0
      Source/ThirdParty/LuaJIT/src/lj_buf.h
  67. 82 4
      Source/ThirdParty/LuaJIT/src/lj_carith.c
  68. 11 1
      Source/ThirdParty/LuaJIT/src/lj_carith.h
  69. 158 51
      Source/ThirdParty/LuaJIT/src/lj_ccall.c
  70. 20 13
      Source/ThirdParty/LuaJIT/src/lj_ccall.h
  71. 105 28
      Source/ThirdParty/LuaJIT/src/lj_ccallback.c
  72. 1 1
      Source/ThirdParty/LuaJIT/src/lj_ccallback.h
  73. 2 1
      Source/ThirdParty/LuaJIT/src/lj_cconv.c
  74. 1 1
      Source/ThirdParty/LuaJIT/src/lj_cconv.h
  75. 35 23
      Source/ThirdParty/LuaJIT/src/lj_cdata.c
  76. 6 3
      Source/ThirdParty/LuaJIT/src/lj_cdata.h
  77. 20 11
      Source/ThirdParty/LuaJIT/src/lj_clib.c
  78. 1 1
      Source/ThirdParty/LuaJIT/src/lj_clib.h
  79. 24 34
      Source/ThirdParty/LuaJIT/src/lj_cparse.c
  80. 1 1
      Source/ThirdParty/LuaJIT/src/lj_cparse.h
  81. 249 68
      Source/ThirdParty/LuaJIT/src/lj_crecord.c
  82. 8 1
      Source/ThirdParty/LuaJIT/src/lj_crecord.h
  83. 11 8
      Source/ThirdParty/LuaJIT/src/lj_ctype.c
  84. 2 2
      Source/ThirdParty/LuaJIT/src/lj_ctype.h
  85. 142 46
      Source/ThirdParty/LuaJIT/src/lj_debug.c
  86. 7 3
      Source/ThirdParty/LuaJIT/src/lj_debug.h
  87. 24 8
      Source/ThirdParty/LuaJIT/src/lj_def.h
  88. 73 10
      Source/ThirdParty/LuaJIT/src/lj_dispatch.c
  89. 32 9
      Source/ThirdParty/LuaJIT/src/lj_dispatch.h
  90. 9 9
      Source/ThirdParty/LuaJIT/src/lj_emit_arm.h
  91. 9 9
      Source/ThirdParty/LuaJIT/src/lj_emit_mips.h
  92. 9 9
      Source/ThirdParty/LuaJIT/src/lj_emit_ppc.h
  93. 11 15
      Source/ThirdParty/LuaJIT/src/lj_emit_x86.h
  94. 124 77
      Source/ThirdParty/LuaJIT/src/lj_err.c
  95. 1 1
      Source/ThirdParty/LuaJIT/src/lj_err.h
  96. 2 5
      Source/ThirdParty/LuaJIT/src/lj_errmsg.h
  97. 1 1
      Source/ThirdParty/LuaJIT/src/lj_ff.h
  98. 449 120
      Source/ThirdParty/LuaJIT/src/lj_ffrecord.c
  99. 1 1
      Source/ThirdParty/LuaJIT/src/lj_ffrecord.h
  100. 110 23
      Source/ThirdParty/LuaJIT/src/lj_frame.h

+ 1 - 1
Source/ThirdParty/LuaJIT/COPYRIGHT

@@ -1,7 +1,7 @@
 ===============================================================================
 ===============================================================================
 LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
 LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
 
 
-Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 of this software and associated documentation files (the "Software"), to deal

+ 3 - 3
Source/ThirdParty/LuaJIT/README

@@ -1,11 +1,11 @@
-README for LuaJIT 2.0.3
------------------------
+README for LuaJIT 2.1.0-beta1
+-----------------------------
 
 
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 
 
 Project Homepage: http://luajit.org/
 Project Homepage: http://luajit.org/
 
 
-LuaJIT is Copyright (C) 2005-2014 Mike Pall.
+LuaJIT is Copyright (C) 2005-2015 Mike Pall.
 LuaJIT is free software, released under the MIT license.
 LuaJIT is free software, released under the MIT license.
 See full Copyright Notice in the COPYRIGHT file or in luajit.h.
 See full Copyright Notice in the COPYRIGHT file or in luajit.h.
 
 

+ 1 - 1
Source/ThirdParty/LuaJIT/dynasm/dasm_arm.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** DynASM ARM encoding engine.
 ** DynASM ARM encoding engine.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 

+ 9 - 6
Source/ThirdParty/LuaJIT/dynasm/dasm_arm.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- DynASM ARM module.
 -- DynASM ARM module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
@@ -9,9 +9,9 @@
 local _info = {
 local _info = {
   arch =	"arm",
   arch =	"arm",
   description =	"DynASM ARM module",
   description =	"DynASM ARM module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }
@@ -923,19 +923,22 @@ local function parse_template(params, template, nparams, pos)
 end
 end
 
 
 map_op[".template__"] = function(params, template, nparams)
 map_op[".template__"] = function(params, template, nparams)
-  if not params then return sub(template, 9) end
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
 
 
   -- Limit number of section buffer positions used by a single dasm_put().
   -- Limit number of section buffer positions used by a single dasm_put().
   -- A single opcode needs a maximum of 3 positions.
   -- A single opcode needs a maximum of 3 positions.
   if secpos+3 > maxsecpos then wflush() end
   if secpos+3 > maxsecpos then wflush() end
   local pos = wpos()
   local pos = wpos()
-  local apos, spos = #actargs, secpos
+  local lpos, apos, spos = #actlist, #actargs, secpos
 
 
   local ok, err
   local ok, err
   for t in gmatch(template, "[^|]+") do
   for t in gmatch(template, "[^|]+") do
     ok, err = pcall(parse_template, params, t, nparams, pos)
     ok, err = pcall(parse_template, params, t, nparams, pos)
     if ok then return end
     if ok then return end
     secpos = spos
     secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
     actargs[apos+1] = nil
     actargs[apos+1] = nil
     actargs[apos+2] = nil
     actargs[apos+2] = nil
     actargs[apos+3] = nil
     actargs[apos+3] = nil

+ 518 - 0
Source/ThirdParty/LuaJIT/dynasm/dasm_arm64.h

@@ -0,0 +1,518 @@
+/*
+** DynASM ARM64 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"arm64"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. */
+enum {
+  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+  /* The following actions need a buffer position. */
+  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+  /* The following actions also have an argument. */
+  DASM_REL_PC, DASM_LABEL_PC,
+  DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
+  DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals (bias -10). */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
+  }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+  if ((n >> 12) == 0)
+    return n;
+  else if ((n & 0xff000fff) == 0)
+    return (n >> 12) | 0x1000;
+  else
+    return -1;
+}
+
+static int dasm_ffs(unsigned long long x)
+{
+  int n = -1;
+  while (x) { x >>= 1; n++; }
+  return n;
+}
+
+static int dasm_imm13(int lo, int hi)
+{
+  int inv = 0, w = 64, s = 0xfff, xa, xb;
+  unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
+  unsigned long long m = 1ULL, a, b, c;
+  if (n & 1) { n = ~n; inv = 1; }
+  a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
+  xa = dasm_ffs(a); xb = dasm_ffs(b);
+  if (c) {
+    w = dasm_ffs(c) - xa;
+    if (w == 32) m = 0x0000000100000001UL;
+    else if (w == 16) m = 0x0001000100010001UL;
+    else if (w == 8) m = 0x0101010101010101UL;
+    else if (w == 4) m = 0x1111111111111111UL;
+    else if (w == 2) m = 0x5555555555555555UL;
+    else return -1;
+    s = (-2*w & 0x3f) - 1;
+  } else if (!a) {
+    return -1;
+  } else if (xb == -1) {
+    xb = 64;
+  }
+  if ((b-a) * m != n) return -1;
+  if (inv) {
+    return ((w - xb) << 6) | (s+w+xa-xb);
+  } else {
+    return ((w - xa) << 6) | (s+xb-xa);
+  }
+  return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    unsigned int ins = *p++;
+    unsigned int action = (ins >> 16);
+    if (action >= DASM__MAX) {
+      ofs += 4;
+    } else {
+      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+      switch (action) {
+      case DASM_STOP: goto stop;
+      case DASM_SECTION:
+	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+	D->section = &D->sections[n]; goto stop;
+      case DASM_ESC: p++; ofs += 4; break;
+      case DASM_REL_EXT: break;
+      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+      case DASM_REL_LG:
+	n = (ins & 2047) - 10; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl += 10; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	break;
+      case DASM_LABEL_LG:
+	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+	}
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_IMM:
+	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+	n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+	if ((ins & 0x8000))
+	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+	else
+	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+      case DASM_IMM6:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM12:
+	CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13W:
+	CK(dasm_imm13(n, n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13X: {
+	int m = va_arg(ap, int);
+	CK(dasm_imm13(n, m) != -1, RANGE_I);
+	b[pos++] = n;
+	b[pos++] = m;
+	break;
+	}
+      case DASM_IMML: {
+#ifdef DASM_CHECKS
+	int scale = (p[-2] >> 30);
+	CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
+	   (unsigned int)(n+256) < 512, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+	}
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: p++; break;
+	case DASM_REL_EXT: break;
+	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+	case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
+	case DASM_IMML: pos++; break;
+	case DASM_IMM13X: pos += 2; break;
+	}
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st)	((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned int *cp = (unsigned int *)buffer;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: *cp++ = *p++; break;
+	case DASM_REL_EXT:
+	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
+	  goto patchrel;
+	case DASM_ALIGN:
+	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+	  break;
+	case DASM_REL_LG:
+	  CK(n >= 0, UNDEF_LG);
+	case DASM_REL_PC:
+	  CK(n >= 0, UNDEF_PC);
+	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+	patchrel:
+	  if (!(ins & 0xf800)) {  /* B, BL */
+	    CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
+	    cp[-1] |= ((n >> 2) & 0x03ffffff);
+	  } else if ((ins & 0x800)) {  /* B.cond, CBZ, CBNZ, LDR* literal */
+	    CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0);
+	  } else if ((ins & 0x3000) == 0x2000) {  /* ADR */
+	    CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
+	  } else if ((ins & 0x3000) == 0x3000) {  /* ADRP */
+	    cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
+	  } else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
+	    CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x0007ffe0);
+	  }
+	  break;
+	case DASM_LABEL_LG:
+	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  break;
+	case DASM_LABEL_PC: break;
+	case DASM_IMM:
+	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+	  break;
+	case DASM_IMM6:
+	  cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
+	  break;
+	case DASM_IMM12:
+	  cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
+	  break;
+	case DASM_IMM13W:
+	  cp[-1] |= (dasm_imm13(n, n) << 10);
+	  break;
+	case DASM_IMM13X:
+	  cp[-1] |= (dasm_imm13(n, *b++) << 10);
+	  break;
+	case DASM_IMML: {
+	  int scale = (p[-2] >> 30);
+	  cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
+	    ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
+	  break;
+	  }
+	default: *cp++ = ins; break;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+  return D->status;
+}
+#endif
+

+ 1166 - 0
Source/ThirdParty/LuaJIT/dynasm/dasm_arm64.lua

@@ -0,0 +1,1166 @@
+------------------------------------------------------------------------------
+-- DynASM ARM64 module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+  arch =	"arm",
+  description =	"DynASM ARM64 module",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "STOP", "SECTION", "ESC", "REL_EXT",
+  "ALIGN", "REL_LG", "LABEL_LG",
+  "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+  map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+  for i = 1,nn-1 do
+    assert(out:write("0x", tohex(actlist[i]), ",\n"))
+  end
+  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxw(w * 0x10000 + (val or 0))
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+  if n <= 0x000fffff then waction("ESC") end
+  wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  if n <= 0x000fffff then
+    insert(actlist, pos+1, n)
+    n = map_action.ESC * 0x10000
+  end
+  actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20,next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0,next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0,next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, }
+
+local map_extend = {
+  uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
+  sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
+}
+
+local map_cond = {
+  eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+  hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+  hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+local parse_reg_type
+
+local function parse_reg(expr)
+  if not expr then werror("expected register name") end
+  local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
+      if not parse_reg_type then
+	parse_reg_type = rt
+      elseif parse_reg_type ~= rt then
+	werror("register size mismatch")
+      end
+      return r, tp
+    end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_reg_base(expr)
+  if expr == "sp" then return 0x3e0 end
+  local base, tp = parse_reg(expr)
+  if parse_reg_type ~= "x" then werror("bad register type") end
+  parse_reg_type = false
+  return shl(base, 5), tp
+end
+
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n then
+      if signed then
+	local s = sar(m, bits-1)
+	if s == 0 then return shl(m, shift)
+	elseif s == -1 then return shl(m + shl(1, bits), shift) end
+      else
+	if sar(m, bits) == 0 then return shl(m, shift) end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    return 0
+  end
+end
+
+local function parse_imm12(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if shr(n, 12) == 0 then
+      return shl(n, 10)
+    elseif band(n, 0xff000fff) == 0 then
+      return shr(n, 2) + 0x00400000
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM12", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm13(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  local r64 = parse_reg_type == "x"
+  if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
+    local inv = false
+    if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
+    local t = {}
+    for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
+    local b = table.concat(t)
+    b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
+    local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
+    if p0 then
+      local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
+      if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
+	local s = band(-2*w, 0x3f) - 1
+	if w == 64 then s = s + 0x1000 end
+	if inv then
+	  return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
+	else
+	  return shl(w-#p0, 16) + shl(s+#p1, 10)
+	end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif r64 then
+    waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
+    actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
+    return 0
+  else
+    waction("IMM13W", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm6(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if n >= 0 and n <= 63 then
+      return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM6", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm_load(imm, scale)
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n and m >= 0 and m < 0x1000 then
+      return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
+    elseif n >= -256 and n < 256 then
+      return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMML", 0, imm)
+    return 0
+  end
+end
+
+local function parse_fpimm(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m, e = math.frexp(n)
+    local s, e2 = 0, band(e-2, 7)
+    if m < 0 then m = -m; s = 0x00100000 end
+    m = m*32-16
+    if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
+      return s + shl(e2, 17) + shl(m, 13)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    werror("NYI fpimm action")
+  end
+end
+
+local function parse_shift(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  s = map_shift[s]
+  if not s then werror("expected shift operand") end
+  return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
+end
+
+local function parse_lslx16(expr)
+  local n = match(expr, "^lsl%s*#(%d+)$")
+  n = tonumber(n)
+  if not n then werror("expected shift operand") end
+  if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
+    werror("bad shift amount")
+  end
+  return shl(n, 17)
+end
+
+local function parse_extend(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  if s == "lsl" then
+    s = parse_reg_type == "x" and 3 or 2
+  else
+    s = map_extend[s]
+  end
+  if not s then werror("expected extend operand") end
+  return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
+end
+
+local function parse_cond(expr, inv)
+  local c = map_cond[expr]
+  if not c then werror("expected condition operand") end
+  return shl(bit.bxor(c, inv), 12)
+end
+
+local function parse_load(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMML", 0, format(tp.ctypefmt, tailr))
+	  return op + base
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  local scale = shr(op, 30)
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+  elseif wb == "!" then
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if not p1a then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
+    op = op + parse_reg_base(p1a)
+    if p2a ~= "" then
+      local imm = match(p2a, "^,%s*#(.*)$")
+      if imm then
+	op = op + parse_imm_load(imm, scale)
+      else
+	local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+	op = op + shl(parse_reg(p2b), 16) + 0x00200800
+	if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+	  werror("bad index register type")
+	end
+	if p3b == "" then
+	  if parse_reg_type ~= "x" then werror("bad index register type") end
+	  op = op + 0x6000
+	else
+	  if p3s == "" or p3s == "#0" then
+	  elseif p3s == "#"..scale then
+	    op = op + 0x1000
+	  else
+	    werror("bad scale")
+	  end
+	  if parse_reg_type == "x" then
+	    if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
+	    elseif p3b == "sxtx" then op = op + 0xe000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  else
+	    if p3b == "uxtw" then op = op + 0x4000
+	    elseif p3b == "sxtw" then op = op + 0xc000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  end
+	end
+      end
+    else
+      if wb == "!" then werror("bad use of '!'") end
+      op = op + 0x01000000
+    end
+  end
+  return op
+end
+
+local function parse_load_pair(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local scale = shr(op, 30) == 0 and 2 or 3
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
+	  return op + base + 0x01000000
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + 0x00800000
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
+    op = op + (wb == "!" and 0x01800000 or 0x01000000)
+  end
+  return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
+end
+
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+end
+
+local function branch_type(op)
+  if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
+  elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
+	 band(op, 0x3b000000) == 0x18000000 then
+    return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
+  elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
+  elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
+  elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
+  else
+    assert(false, "unknown branch type")
+  end
+end
+
+------------------------------------------------------------------------------
+
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
+local function alias_bfx(p)
+  p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
+end
+
+local function alias_bfiz(p)
+  parse_reg(p[1])
+  if parse_reg_type == "w" then
+    p[3] = "#-("..p[3]:sub(2)..")%32"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  else
+    p[3] = "#-("..p[3]:sub(2)..")%64"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  end
+end
+
+local alias_lslimm = op_alias("ubfm_4", function(p)
+  parse_reg(p[1])
+  local sh = p[3]:sub(2)
+  if parse_reg_type == "w" then
+    p[3] = "#-("..sh..")%32"
+    p[4] = "#31-("..sh..")"
+  else
+    p[3] = "#-("..sh..")%64"
+    p[4] = "#63-("..sh..")"
+  end
+end)
+
+-- Template strings for ARM instructions.
+map_op = {
+  -- Basic data processing instructions.
+  add_3  = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
+  add_4  = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
+  adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
+  adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
+  cmn_2  = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
+  cmn_3  = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
+
+  sub_3  = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
+  sub_4  = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
+  subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
+  subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
+  cmp_2  = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
+  cmp_3  = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
+
+  neg_2  = "4b0003e0DMg",
+  neg_3  = "4b0003e0DMSg",
+  negs_2 = "6b0003e0DMg",
+  negs_3 = "6b0003e0DMSg",
+
+  adc_3  = "1a000000DNMg",
+  adcs_3 = "3a000000DNMg",
+  sbc_3  = "5a000000DNMg",
+  sbcs_3 = "7a000000DNMg",
+  ngc_2  = "5a0003e0DMg",
+  ngcs_2 = "7a0003e0DMg",
+
+  and_3  = "0a000000DNMg|12000000pDNig",
+  and_4  = "0a000000DNMSg",
+  orr_3  = "2a000000DNMg|32000000pDNig",
+  orr_4  = "2a000000DNMSg",
+  eor_3  = "4a000000DNMg|52000000pDNig",
+  eor_4  = "4a000000DNMSg",
+  ands_3 = "6a000000DNMg|72000000DNig",
+  ands_4 = "6a000000DNMSg",
+  tst_2  = "6a00001fNMg|7200001fNig",
+  tst_3  = "6a00001fNMSg",
+
+  bic_3  = "0a200000DNMg",
+  bic_4  = "0a200000DNMSg",
+  orn_3  = "2a200000DNMg",
+  orn_4  = "2a200000DNMSg",
+  eon_3  = "4a200000DNMg",
+  eon_4  = "4a200000DNMSg",
+  bics_3 = "6a200000DNMg",
+  bics_4 = "6a200000DNMSg",
+
+  movn_2 = "12800000DWg",
+  movn_3 = "12800000DWRg",
+  movz_2 = "52800000DWg",
+  movz_3 = "52800000DWRg",
+  movk_2 = "72800000DWg",
+  movk_3 = "72800000DWRg",
+
+  -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
+  mov_2  = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
+  mov_3  = "2a0003e0DMSg",
+  mvn_2  = "2a2003e0DMg",
+  mvn_3  = "2a2003e0DMSg",
+
+  adr_2  = "10000000DBx",
+  adrp_2 = "90000000DBx",
+
+  csel_4  = "1a800000DNMCg",
+  csinc_4 = "1a800400DNMCg",
+  csinv_4 = "5a800000DNMCg",
+  csneg_4 = "5a800400DNMCg",
+  cset_2  = "1a9f07e0Dcg",
+  csetm_2 = "5a9f03e0Dcg",
+  cinc_3  = "1a800400DNmcg",
+  cinv_3  = "5a800000DNmcg",
+  cneg_3  = "5a800400DNmcg",
+
+  ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
+  ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
+
+  madd_4 = "1b000000DNMAg",
+  msub_4 = "1b008000DNMAg",
+  mul_3  = "1b007c00DNMg",
+  mneg_3 = "1b00fc00DNMg",
+
+  smaddl_4 = "9b200000DxNMwAx",
+  smsubl_4 = "9b208000DxNMwAx",
+  smull_3  = "9b207c00DxNMw",
+  smnegl_3 = "9b20fc00DxNMw",
+  smulh_3  = "9b407c00DNMx",
+  umaddl_4 = "9ba00000DxNMwAx",
+  umsubl_4 = "9ba08000DxNMwAx",
+  umull_3  = "9ba07c00DxNMw",
+  umnegl_3 = "9ba0fc00DxNMw",
+  umulh_3  = "9bc07c00DNMx",
+
+  udiv_3 = "1ac00800DNMg",
+  sdiv_3 = "1ac00c00DNMg",
+
+  -- Bit operations.
+  sbfm_4 = "13000000DN12w|93400000DN12x",
+  bfm_4  = "33000000DN12w|b3400000DN12x",
+  ubfm_4 = "53000000DN12w|d3400000DN12x",
+  extr_4 = "13800000DNM2w|93c00000DNM2x",
+
+  sxtb_2 = "13001c00DNw|93401c00DNx",
+  sxth_2 = "13003c00DNw|93403c00DNx",
+  sxtw_2 = "93407c00DxNw",
+  uxtb_2 = "53001c00DNw",
+  uxth_2 = "53003c00DNw",
+
+  sbfx_4  = op_alias("sbfm_4", alias_bfx),
+  bfxil_4 = op_alias("bfm_4", alias_bfx),
+  ubfx_4  = op_alias("ubfm_4", alias_bfx),
+  sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
+  bfi_4   = op_alias("bfm_4", alias_bfiz),
+  ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
+
+  lsl_3  = function(params, nparams)
+    if params and params[3]:byte() == 35 then
+      return alias_lslimm(params, nparams)
+    else
+      return op_template(params, "1ac02000DNMg", nparams)
+    end
+  end,
+  lsr_3  = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
+  asr_3  = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
+  ror_3  = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
+
+  clz_2   = "5ac01000DNg",
+  cls_2   = "5ac01400DNg",
+  rbit_2  = "5ac00000DNg",
+  rev_2   = "5ac00800DNw|dac00c00DNx",
+  rev16_2 = "5ac00400DNg",
+  rev32_2 = "dac00800DNx",
+
+  -- Loads and stores.
+  ["strb_*"]  = "38000000DwL",
+  ["ldrb_*"]  = "38400000DwL",
+  ["ldrsb_*"] = "38c00000DwL|38800000DxL",
+  ["strh_*"]  = "78000000DwL",
+  ["ldrh_*"]  = "78400000DwL",
+  ["ldrsh_*"] = "78c00000DwL|78800000DxL",
+  ["str_*"]   = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
+  ["ldr_*"]   = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
+  ["ldrsw_*"] = "98000000DxB|b8800000DxL",
+  -- NOTE: ldur etc. are handled by ldr et al.
+
+  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
+  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+  ["ldpsw_*"] = "68400000DAxP",
+
+  -- Branches.
+  b_1    = "14000000B",
+  bl_1   = "94000000B",
+  blr_1  = "d63f0000Nx",
+  br_1   = "d61f0000Nx",
+  ret_0  = "d65f03c0",
+  ret_1  = "d65f0000Nx",
+  -- b.cond is added below.
+  cbz_2  = "34000000DBg",
+  cbnz_2 = "35000000DBg",
+  tbz_3  = "36000000DTBw|36000000DTBx",
+  tbnz_3 = "37000000DTBw|37000000DTBx",
+
+  -- Miscellaneous instructions.
+  -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
+  -- TODO: sys, sysl, ic, dc, at, tlbi
+  -- TODO: hint, yield, wfe, wfi, sev, sevl
+  -- TODO: clrex, dsb, dmb, isb
+  nop_0  = "d503201f",
+  brk_0  = "d4200000",
+  brk_1  = "d4200000W",
+
+  -- Floating point instructions.
+  fmov_2  = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
+  fabs_2  = "1e20c000DNf",
+  fneg_2  = "1e214000DNf",
+  fsqrt_2 = "1e21c000DNf",
+
+  fcvt_2  = "1e22c000DdNs|1e624000DsNd",
+
+  -- TODO: half-precision and fixed-point conversions.
+  fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
+  fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
+  fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
+  fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
+  fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
+  fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
+  fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
+  fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
+  fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
+  fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
+
+  scvtf_2  = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
+  ucvtf_2  = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
+
+  frintn_2 = "1e244000DNf",
+  frintp_2 = "1e24c000DNf",
+  frintm_2 = "1e254000DNf",
+  frintz_2 = "1e25c000DNf",
+  frinta_2 = "1e264000DNf",
+  frintx_2 = "1e274000DNf",
+  frinti_2 = "1e27c000DNf",
+
+  fadd_3   = "1e202800DNMf",
+  fsub_3   = "1e203800DNMf",
+  fmul_3   = "1e200800DNMf",
+  fnmul_3  = "1e208800DNMf",
+  fdiv_3   = "1e201800DNMf",
+
+  fmadd_4  = "1f000000DNMAf",
+  fmsub_4  = "1f008000DNMAf",
+  fnmadd_4 = "1f200000DNMAf",
+  fnmsub_4 = "1f208000DNMAf",
+
+  fmax_3   = "1e204800DNMf",
+  fmaxnm_3 = "1e206800DNMf",
+  fmin_3   = "1e205800DNMf",
+  fminnm_3 = "1e207800DNMf",
+
+  fcmp_2   = "1e202000NMf|1e202008NZf",
+  fcmpe_2  = "1e202010NMf|1e202018NZf",
+
+  fccmp_4  = "1e200400NMVCf",
+  fccmpe_4 = "1e200410NMVCf",
+
+  fcsel_4  = "1e200c00DNMCf",
+
+  -- TODO: crc32*, aes*, sha*, pmull
+  -- TODO: SIMD instructions.
+}
+
+for cond,c in pairs(map_cond) do
+  map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+  local op = tonumber(sub(template, 1, 8), 16)
+  local n = 1
+  local rtt = {}
+
+  parse_reg_type = false
+
+  -- Process each character.
+  for p in gmatch(sub(template, 9), ".") do
+    local q = params[n]
+    if p == "D" then
+      op = op + parse_reg(q); n = n + 1
+    elseif p == "N" then
+      op = op + shl(parse_reg(q), 5); n = n + 1
+    elseif p == "M" then
+      op = op + shl(parse_reg(q), 16); n = n + 1
+    elseif p == "A" then
+      op = op + shl(parse_reg(q), 10); n = n + 1
+    elseif p == "m" then
+      op = op + shl(parse_reg(params[n-1]), 16)
+
+    elseif p == "p" then
+      if q == "sp" then params[n] = "@x31" end
+    elseif p == "g" then
+      if parse_reg_type == "x" then
+	op = op + 0x80000000
+      elseif parse_reg_type ~= "w" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "f" then
+      if parse_reg_type == "d" then
+	op = op + 0x00400000
+      elseif parse_reg_type ~= "s" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "x" or p == "w" or p == "d" or p == "s" then
+      if parse_reg_type ~= p then
+	werror("register size mismatch")
+      end
+      parse_reg_type = false
+
+    elseif p == "L" then
+      op = parse_load(params, nparams, n, op)
+    elseif p == "P" then
+      op = parse_load_pair(params, nparams, n, op)
+
+    elseif p == "B" then
+      local mode, v, s = parse_label(q, false); n = n + 1
+      local m = branch_type(op)
+      waction("REL_"..mode, v+m, s, 1)
+
+    elseif p == "I" then
+      op = op + parse_imm12(q); n = n + 1
+    elseif p == "i" then
+      op = op + parse_imm13(q); n = n + 1
+    elseif p == "W" then
+      op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
+    elseif p == "T" then
+      op = op + parse_imm6(q); n = n + 1
+    elseif p == "1" then
+      op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
+    elseif p == "2" then
+      op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
+    elseif p == "5" then
+      op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
+    elseif p == "V" then
+      op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
+    elseif p == "F" then
+      op = op + parse_fpimm(q); n = n + 1
+    elseif p == "Z" then
+      if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
+      n = n + 1
+
+    elseif p == "S" then
+      op = op + parse_shift(q); n = n + 1
+    elseif p == "X" then
+      op = op + parse_extend(q); n = n + 1
+    elseif p == "R" then
+      op = op + parse_lslx16(q); n = n + 1
+    elseif p == "C" then
+      op = op + parse_cond(q, 0); n = n + 1
+    elseif p == "c" then
+      op = op + parse_cond(q, 1); n = n + 1
+
+    else
+      assert(false)
+    end
+  end
+  wputpos(pos, op)
+end
+
+function op_template(params, template, nparams)
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 3 positions.
+  if secpos+3 > maxsecpos then wflush() end
+  local pos = wpos()
+  local lpos, apos, spos = #actlist, #actargs, secpos
+
+  local ok, err
+  for t in gmatch(template, "[^|]+") do
+    ok, err = pcall(parse_template, params, t, nparams, pos)
+    if ok then return end
+    secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
+    actargs[apos+1] = nil
+    actargs[apos+2] = nil
+    actargs[apos+3] = nil
+  end
+  error(err, 0)
+end
+
+map_op[".template__"] = op_template
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _,p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+

+ 1 - 1
Source/ThirdParty/LuaJIT/dynasm/dasm_mips.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** DynASM MIPS encoding engine.
 ** DynASM MIPS encoding engine.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 

+ 4 - 4
Source/ThirdParty/LuaJIT/dynasm/dasm_mips.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- DynASM MIPS module.
 -- DynASM MIPS module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
@@ -9,9 +9,9 @@
 local _info = {
 local _info = {
   arch =	"mips",
   arch =	"mips",
   description =	"DynASM MIPS module",
   description =	"DynASM MIPS module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2012-01-23",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }

+ 11 - 4
Source/ThirdParty/LuaJIT/dynasm/dasm_ppc.h

@@ -1,6 +1,6 @@
 /*
 /*
-** DynASM PPC encoding engine.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** DynASM PPC/PPC64 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 
@@ -21,7 +21,7 @@ enum {
   /* The following actions need a buffer position. */
   /* The following actions need a buffer position. */
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   /* The following actions also have an argument. */
   /* The following actions also have an argument. */
-  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
   DASM__MAX
   DASM__MAX
 };
 };
 
 
@@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
 #endif
 #endif
 	b[pos++] = n;
 	b[pos++] = n;
 	break;
 	break;
+      case DASM_IMMSH:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
       }
       }
     }
     }
   }
   }
@@ -299,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-	case DASM_IMM: pos++; break;
+	case DASM_IMM: case DASM_IMMSH: pos++; break;
 	}
 	}
       }
       }
       stop: (void)0;
       stop: (void)0;
@@ -366,6 +370,9 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_IMM:
 	case DASM_IMM:
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  break;
 	  break;
+	case DASM_IMMSH:
+	  cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
+	  break;
 	default: *cp++ = ins; break;
 	default: *cp++ = ins; break;
 	}
 	}
       }
       }

+ 687 - 17
Source/ThirdParty/LuaJIT/dynasm/dasm_ppc.lua

@@ -1,17 +1,19 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
--- DynASM PPC module.
+-- DynASM PPC/PPC64 module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
+--
+-- Support for various extensions contributed by Caio Souza Oliveira.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Module information:
 -- Module information:
 local _info = {
 local _info = {
   arch =	"ppc",
   arch =	"ppc",
   description =	"DynASM PPC module",
   description =	"DynASM PPC module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
 local action_names = {
 local action_names = {
   "STOP", "SECTION", "ESC", "REL_EXT",
   "STOP", "SECTION", "ESC", "REL_EXT",
   "ALIGN", "REL_LG", "LABEL_LG",
   "ALIGN", "REL_LG", "LABEL_LG",
-  "REL_PC", "LABEL_PC", "IMM",
+  "REL_PC", "LABEL_PC", "IMM", "IMMSH"
 }
 }
 
 
 -- Maximum number of section buffer positions for dasm_put().
 -- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
 -- Template strings for PPC instructions.
 -- Template strings for PPC instructions.
-local map_op = {
+map_op = {
   tdi_3 =	"08000000ARI",
   tdi_3 =	"08000000ARI",
   twi_3 =	"0c000000ARI",
   twi_3 =	"0c000000ARI",
   mulli_3 =	"1c000000RRI",
   mulli_3 =	"1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
   std_2 =	"f8000000RD",
   std_2 =	"f8000000RD",
   stdu_2 =	"f8000001RD",
   stdu_2 =	"f8000001RD",
 
 
+  subi_3 =	op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
+  subis_3 =	op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
+  subic_3 =	op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
+  ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
+
+  rotlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  rotrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
+  end),
+  rotlw_3 =	op_alias("rlwnm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  slwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[4] = "0"
+  end),
+  srwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
+  end),
+  clrlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "0"; p[5] = "31"
+  end),
+  clrrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
+  end),
+
+  -- Primary opcode 4:
+  mulhhwu_3 =		"10000010RRR.",
+  machhwu_3 =		"10000018RRR.",
+  mulhhw_3 =		"10000050RRR.",
+  nmachhw_3 =		"1000005cRRR.",
+  machhwsu_3 =		"10000098RRR.",
+  machhws_3 =		"100000d8RRR.",
+  nmachhws_3 =		"100000dcRRR.",
+  mulchwu_3 =		"10000110RRR.",
+  macchwu_3 =		"10000118RRR.",
+  mulchw_3 =		"10000150RRR.",
+  macchw_3 =		"10000158RRR.",
+  nmacchw_3 =		"1000015cRRR.",
+  macchwsu_3 =		"10000198RRR.",
+  macchws_3 =		"100001d8RRR.",
+  nmacchws_3 =		"100001dcRRR.",
+  mullhw_3 =		"10000350RRR.",
+  maclhw_3 =		"10000358RRR.",
+  nmaclhw_3 =		"1000035cRRR.",
+  maclhwsu_3 =		"10000398RRR.",
+  maclhws_3 =		"100003d8RRR.",
+  nmaclhws_3 =		"100003dcRRR.",
+  machhwuo_3 =		"10000418RRR.",
+  nmachhwo_3 =		"1000045cRRR.",
+  machhwsuo_3 =		"10000498RRR.",
+  machhwso_3 =		"100004d8RRR.",
+  nmachhwso_3 =		"100004dcRRR.",
+  macchwuo_3 =		"10000518RRR.",
+  macchwo_3 =		"10000558RRR.",
+  nmacchwo_3 =		"1000055cRRR.",
+  macchwsuo_3 =		"10000598RRR.",
+  macchwso_3 =		"100005d8RRR.",
+  nmacchwso_3 =		"100005dcRRR.",
+  maclhwo_3 =		"10000758RRR.",
+  nmaclhwo_3 =		"1000075cRRR.",
+  maclhwsuo_3 =		"10000798RRR.",
+  maclhwso_3 =		"100007d8RRR.",
+  nmaclhwso_3 =		"100007dcRRR.",
+
+  vaddubm_3 =		"10000000VVV",
+  vmaxub_3 =		"10000002VVV",
+  vrlb_3 =		"10000004VVV",
+  vcmpequb_3 =		"10000006VVV",
+  vmuloub_3 =		"10000008VVV",
+  vaddfp_3 =		"1000000aVVV",
+  vmrghb_3 =		"1000000cVVV",
+  vpkuhum_3 =		"1000000eVVV",
+  vmhaddshs_4 =		"10000020VVVV",
+  vmhraddshs_4 =	"10000021VVVV",
+  vmladduhm_4 =		"10000022VVVV",
+  vmsumubm_4 =		"10000024VVVV",
+  vmsummbm_4 =		"10000025VVVV",
+  vmsumuhm_4 =		"10000026VVVV",
+  vmsumuhs_4 =		"10000027VVVV",
+  vmsumshm_4 =		"10000028VVVV",
+  vmsumshs_4 =		"10000029VVVV",
+  vsel_4 =		"1000002aVVVV",
+  vperm_4 =		"1000002bVVVV",
+  vsldoi_4 =		"1000002cVVVP",
+  vpermxor_4 =		"1000002dVVVV",
+  vmaddfp_4 =		"1000002eVVVV~",
+  vnmsubfp_4 =		"1000002fVVVV~",
+  vaddeuqm_4 =		"1000003cVVVV",
+  vaddecuq_4 =		"1000003dVVVV",
+  vsubeuqm_4 =		"1000003eVVVV",
+  vsubecuq_4 =		"1000003fVVVV",
+  vadduhm_3 =		"10000040VVV",
+  vmaxuh_3 =		"10000042VVV",
+  vrlh_3 =		"10000044VVV",
+  vcmpequh_3 =		"10000046VVV",
+  vmulouh_3 =		"10000048VVV",
+  vsubfp_3 =		"1000004aVVV",
+  vmrghh_3 =		"1000004cVVV",
+  vpkuwum_3 =		"1000004eVVV",
+  vadduwm_3 =		"10000080VVV",
+  vmaxuw_3 =		"10000082VVV",
+  vrlw_3 =		"10000084VVV",
+  vcmpequw_3 =		"10000086VVV",
+  vmulouw_3 =		"10000088VVV",
+  vmuluwm_3 =		"10000089VVV",
+  vmrghw_3 =		"1000008cVVV",
+  vpkuhus_3 =		"1000008eVVV",
+  vaddudm_3 =		"100000c0VVV",
+  vmaxud_3 =		"100000c2VVV",
+  vrld_3 =		"100000c4VVV",
+  vcmpeqfp_3 =		"100000c6VVV",
+  vcmpequd_3 =		"100000c7VVV",
+  vpkuwus_3 =		"100000ceVVV",
+  vadduqm_3 =		"10000100VVV",
+  vmaxsb_3 =		"10000102VVV",
+  vslb_3 =		"10000104VVV",
+  vmulosb_3 =		"10000108VVV",
+  vrefp_2 =		"1000010aV-V",
+  vmrglb_3 =		"1000010cVVV",
+  vpkshus_3 =		"1000010eVVV",
+  vaddcuq_3 =		"10000140VVV",
+  vmaxsh_3 =		"10000142VVV",
+  vslh_3 =		"10000144VVV",
+  vmulosh_3 =		"10000148VVV",
+  vrsqrtefp_2 =		"1000014aV-V",
+  vmrglh_3 =		"1000014cVVV",
+  vpkswus_3 =		"1000014eVVV",
+  vaddcuw_3 =		"10000180VVV",
+  vmaxsw_3 =		"10000182VVV",
+  vslw_3 =		"10000184VVV",
+  vmulosw_3 =		"10000188VVV",
+  vexptefp_2 =		"1000018aV-V",
+  vmrglw_3 =		"1000018cVVV",
+  vpkshss_3 =		"1000018eVVV",
+  vmaxsd_3 =		"100001c2VVV",
+  vsl_3 =		"100001c4VVV",
+  vcmpgefp_3 =		"100001c6VVV",
+  vlogefp_2 =		"100001caV-V",
+  vpkswss_3 =		"100001ceVVV",
+  vadduhs_3 =		"10000240VVV",
+  vminuh_3 =		"10000242VVV",
+  vsrh_3 =		"10000244VVV",
+  vcmpgtuh_3 =		"10000246VVV",
+  vmuleuh_3 =		"10000248VVV",
+  vrfiz_2 =		"1000024aV-V",
+  vsplth_3 =		"1000024cVV3",
+  vupkhsh_2 =		"1000024eV-V",
+  vminuw_3 =		"10000282VVV",
+  vminud_3 =		"100002c2VVV",
+  vcmpgtud_3 =		"100002c7VVV",
+  vrfim_2 =		"100002caV-V",
+  vcmpgtsb_3 =		"10000306VVV",
+  vcfux_3 =		"1000030aVVA~",
+  vaddshs_3 =		"10000340VVV",
+  vminsh_3 =		"10000342VVV",
+  vsrah_3 =		"10000344VVV",
+  vcmpgtsh_3 =		"10000346VVV",
+  vmulesh_3 =		"10000348VVV",
+  vcfsx_3 =		"1000034aVVA~",
+  vspltish_2 =		"1000034cVS",
+  vupkhpx_2 =		"1000034eV-V",
+  vaddsws_3 =		"10000380VVV",
+  vminsw_3 =		"10000382VVV",
+  vsraw_3 =		"10000384VVV",
+  vcmpgtsw_3 =		"10000386VVV",
+  vmulesw_3 =		"10000388VVV",
+  vctuxs_3 =		"1000038aVVA~",
+  vspltisw_2 =		"1000038cVS",
+  vminsd_3 =		"100003c2VVV",
+  vsrad_3 =		"100003c4VVV",
+  vcmpbfp_3 =		"100003c6VVV",
+  vcmpgtsd_3 =		"100003c7VVV",
+  vctsxs_3 =		"100003caVVA~",
+  vupklpx_2 =		"100003ceV-V",
+  vsububm_3 =		"10000400VVV",
+  ["bcdadd._4"] =	"10000401VVVy.",
+  vavgub_3 =		"10000402VVV",
+  vand_3 =		"10000404VVV",
+  ["vcmpequb._3"] =	"10000406VVV",
+  vmaxfp_3 =		"1000040aVVV",
+  vsubuhm_3 =		"10000440VVV",
+  ["bcdsub._4"] =	"10000441VVVy.",
+  vavguh_3 =		"10000442VVV",
+  vandc_3 =		"10000444VVV",
+  ["vcmpequh._3"] =	"10000446VVV",
+  vminfp_3 =		"1000044aVVV",
+  vpkudum_3 =		"1000044eVVV",
+  vsubuwm_3 =		"10000480VVV",
+  vavguw_3 =		"10000482VVV",
+  vor_3 =		"10000484VVV",
+  ["vcmpequw._3"] =	"10000486VVV",
+  vpmsumw_3 =		"10000488VVV",
+  ["vcmpeqfp._3"] =	"100004c6VVV",
+  ["vcmpequd._3"] =	"100004c7VVV",
+  vpkudus_3 =		"100004ceVVV",
+  vavgsb_3 =		"10000502VVV",
+  vavgsh_3 =		"10000542VVV",
+  vorc_3 =		"10000544VVV",
+  vbpermq_3 =		"1000054cVVV",
+  vpksdus_3 =		"1000054eVVV",
+  vavgsw_3 =		"10000582VVV",
+  vsld_3 =		"100005c4VVV",
+  ["vcmpgefp._3"] =	"100005c6VVV",
+  vpksdss_3 =		"100005ceVVV",
+  vsububs_3 =		"10000600VVV",
+  mfvscr_1 =		"10000604V--",
+  vsum4ubs_3 =		"10000608VVV",
+  vsubuhs_3 =		"10000640VVV",
+  mtvscr_1 =		"10000644--V",
+  ["vcmpgtuh._3"] =	"10000646VVV",
+  vsum4shs_3 =		"10000648VVV",
+  vupkhsw_2 =		"1000064eV-V",
+  vsubuws_3 =		"10000680VVV",
+  vshasigmaw_4 =	"10000682VVYp",
+  veqv_3 =		"10000684VVV",
+  vsum2sws_3 =		"10000688VVV",
+  vmrgow_3 =		"1000068cVVV",
+  vshasigmad_4 =	"100006c2VVYp",
+  vsrd_3 =		"100006c4VVV",
+  ["vcmpgtud._3"] =	"100006c7VVV",
+  vupklsw_2 =		"100006ceV-V",
+  vupkslw_2 =		"100006ceV-V",
+  vsubsbs_3 =		"10000700VVV",
+  vclzb_2 =		"10000702V-V",
+  vpopcntb_2 =		"10000703V-V",
+  ["vcmpgtsb._3"] =	"10000706VVV",
+  vsum4sbs_3 =		"10000708VVV",
+  vsubshs_3 =		"10000740VVV",
+  vclzh_2 =		"10000742V-V",
+  vpopcnth_2 =		"10000743V-V",
+  ["vcmpgtsh._3"] =	"10000746VVV",
+  vsubsws_3 =		"10000780VVV",
+  vclzw_2 =		"10000782V-V",
+  vpopcntw_2 =		"10000783V-V",
+  ["vcmpgtsw._3"] =	"10000786VVV",
+  vsumsws_3 =		"10000788VVV",
+  vmrgew_3 =		"1000078cVVV",
+  vclzd_2 =		"100007c2V-V",
+  vpopcntd_2 =		"100007c3V-V",
+  ["vcmpbfp._3"] =	"100007c6VVV",
+  ["vcmpgtsd._3"] =	"100007c7VVV",
+
   -- Primary opcode 19:
   -- Primary opcode 19:
   mcrf_2 =	"4c000000XX",
   mcrf_2 =	"4c000000XX",
   isync_0 =	"4c00012c",
   isync_0 =	"4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
   bclrl_2 =	"4c000021AA",
   bclrl_2 =	"4c000021AA",
   bcctr_2 =	"4c000420AA",
   bcctr_2 =	"4c000420AA",
   bcctrl_2 =	"4c000421AA",
   bcctrl_2 =	"4c000421AA",
+  bctar_2 =	"4c000460AA",
+  bctarl_2 =	"4c000461AA",
   blr_0 =	"4e800020",
   blr_0 =	"4e800020",
   blrl_0 =	"4e800021",
   blrl_0 =	"4e800021",
   bctr_0 =	"4e800420",
   bctr_0 =	"4e800420",
@@ -327,6 +585,7 @@ local map_op = {
   cmpd_3 =	"7c200000XRR",
   cmpd_3 =	"7c200000XRR",
   cmpd_2 =	"7c200000-RR",
   cmpd_2 =	"7c200000-RR",
   tw_3 =	"7c000008ARR",
   tw_3 =	"7c000008ARR",
+  lvsl_3 =	"7c00000cVRR",
   subfc_3 =	"7c000010RRR.",
   subfc_3 =	"7c000010RRR.",
   subc_3 =	"7c000010RRR~.",
   subc_3 =	"7c000010RRR~.",
   mulhdu_3 =	"7c000012RRR.",
   mulhdu_3 =	"7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
   cmplw_2 =	"7c000040-RR",
   cmplw_2 =	"7c000040-RR",
   cmpld_3 =	"7c200040XRR",
   cmpld_3 =	"7c200040XRR",
   cmpld_2 =	"7c200040-RR",
   cmpld_2 =	"7c200040-RR",
+  lvsr_3 =	"7c00004cVRR",
   subf_3 =	"7c000050RRR.",
   subf_3 =	"7c000050RRR.",
   sub_3 =	"7c000050RRR~.",
   sub_3 =	"7c000050RRR~.",
+  lbarx_3 =	"7c000068RR0R",
   ldux_3 =	"7c00006aRR0R",
   ldux_3 =	"7c00006aRR0R",
   dcbst_2 =	"7c00006c-RR",
   dcbst_2 =	"7c00006c-RR",
   lwzux_3 =	"7c00006eRR0R",
   lwzux_3 =	"7c00006eRR0R",
   cntlzd_2 =	"7c000074RR~",
   cntlzd_2 =	"7c000074RR~",
   andc_3 =	"7c000078RR~R.",
   andc_3 =	"7c000078RR~R.",
   td_3 =	"7c000088ARR",
   td_3 =	"7c000088ARR",
+  lvewx_3 =	"7c00008eVRR",
   mulhd_3 =	"7c000092RRR.",
   mulhd_3 =	"7c000092RRR.",
+  addg6s_3 =	"7c000094RRR",
   mulhw_3 =	"7c000096RRR.",
   mulhw_3 =	"7c000096RRR.",
+  dlmzb_3 =	"7c00009cRR~R.",
   ldarx_3 =	"7c0000a8RR0R",
   ldarx_3 =	"7c0000a8RR0R",
   dcbf_2 =	"7c0000ac-RR",
   dcbf_2 =	"7c0000ac-RR",
   lbzx_3 =	"7c0000aeRR0R",
   lbzx_3 =	"7c0000aeRR0R",
+  lvx_3 =	"7c0000ceVRR",
   neg_2 =	"7c0000d0RR.",
   neg_2 =	"7c0000d0RR.",
+  lharx_3 =	"7c0000e8RR0R",
   lbzux_3 =	"7c0000eeRR0R",
   lbzux_3 =	"7c0000eeRR0R",
   popcntb_2 =	"7c0000f4RR~",
   popcntb_2 =	"7c0000f4RR~",
   not_2 =	"7c0000f8RR~%.",
   not_2 =	"7c0000f8RR~%.",
   nor_3 =	"7c0000f8RR~R.",
   nor_3 =	"7c0000f8RR~R.",
+  stvebx_3 =	"7c00010eVRR",
   subfe_3 =	"7c000110RRR.",
   subfe_3 =	"7c000110RRR.",
   sube_3 =	"7c000110RRR~.",
   sube_3 =	"7c000110RRR~.",
   adde_3 =	"7c000114RRR.",
   adde_3 =	"7c000114RRR.",
   stdx_3 =	"7c00012aRR0R",
   stdx_3 =	"7c00012aRR0R",
-  stwcx_3 =	"7c00012cRR0R.",
+  ["stwcx._3"] =	"7c00012dRR0R.",
   stwx_3 =	"7c00012eRR0R",
   stwx_3 =	"7c00012eRR0R",
   prtyw_2 =	"7c000134RR~",
   prtyw_2 =	"7c000134RR~",
+  stvehx_3 =	"7c00014eVRR",
   stdux_3 =	"7c00016aRR0R",
   stdux_3 =	"7c00016aRR0R",
+  ["stqcx._3"] =	"7c00016dR:R0R.",
   stwux_3 =	"7c00016eRR0R",
   stwux_3 =	"7c00016eRR0R",
   prtyd_2 =	"7c000174RR~",
   prtyd_2 =	"7c000174RR~",
+  stvewx_3 =	"7c00018eVRR",
   subfze_2 =	"7c000190RR.",
   subfze_2 =	"7c000190RR.",
   addze_2 =	"7c000194RR.",
   addze_2 =	"7c000194RR.",
-  stdcx_3 =	"7c0001acRR0R.",
+  ["stdcx._3"] =	"7c0001adRR0R.",
   stbx_3 =	"7c0001aeRR0R",
   stbx_3 =	"7c0001aeRR0R",
+  stvx_3 =	"7c0001ceVRR",
   subfme_2 =	"7c0001d0RR.",
   subfme_2 =	"7c0001d0RR.",
   mulld_3 =	"7c0001d2RRR.",
   mulld_3 =	"7c0001d2RRR.",
   addme_2 =	"7c0001d4RR.",
   addme_2 =	"7c0001d4RR.",
   mullw_3 =	"7c0001d6RRR.",
   mullw_3 =	"7c0001d6RRR.",
   dcbtst_2 =	"7c0001ec-RR",
   dcbtst_2 =	"7c0001ec-RR",
   stbux_3 =	"7c0001eeRR0R",
   stbux_3 =	"7c0001eeRR0R",
+  bpermd_3 =	"7c0001f8RR~R",
+  lvepxl_3 =	"7c00020eVRR",
   add_3 =	"7c000214RRR.",
   add_3 =	"7c000214RRR.",
+  lqarx_3 =	"7c000228R:R0R",
   dcbt_2 =	"7c00022c-RR",
   dcbt_2 =	"7c00022c-RR",
   lhzx_3 =	"7c00022eRR0R",
   lhzx_3 =	"7c00022eRR0R",
+  cdtbcd_2 =	"7c000234RR~",
   eqv_3 =	"7c000238RR~R.",
   eqv_3 =	"7c000238RR~R.",
+  lvepx_3 =	"7c00024eVRR",
   eciwx_3 =	"7c00026cRR0R",
   eciwx_3 =	"7c00026cRR0R",
   lhzux_3 =	"7c00026eRR0R",
   lhzux_3 =	"7c00026eRR0R",
+  cbcdtd_2 =	"7c000274RR~",
   xor_3 =	"7c000278RR~R.",
   xor_3 =	"7c000278RR~R.",
   mfspefscr_1 =	"7c0082a6R",
   mfspefscr_1 =	"7c0082a6R",
   mfxer_1 =	"7c0102a6R",
   mfxer_1 =	"7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
   lhax_3 =	"7c0002aeRR0R",
   lhax_3 =	"7c0002aeRR0R",
   mftb_1 =	"7c0c42e6R",
   mftb_1 =	"7c0c42e6R",
   mftbu_1 =	"7c0d42e6R",
   mftbu_1 =	"7c0d42e6R",
+  lvxl_3 =	"7c0002ceVRR",
   lwaux_3 =	"7c0002eaRR0R",
   lwaux_3 =	"7c0002eaRR0R",
   lhaux_3 =	"7c0002eeRR0R",
   lhaux_3 =	"7c0002eeRR0R",
+  popcntw_2 =	"7c0002f4RR~",
+  divdeu_3 =	"7c000312RRR.",
+  divweu_3 =	"7c000316RRR.",
   sthx_3 =	"7c00032eRR0R",
   sthx_3 =	"7c00032eRR0R",
   orc_3 =	"7c000338RR~R.",
   orc_3 =	"7c000338RR~R.",
   ecowx_3 =	"7c00036cRR0R",
   ecowx_3 =	"7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
   mtctr_1 =	"7c0903a6R",
   mtctr_1 =	"7c0903a6R",
   dcbi_2 =	"7c0003ac-RR",
   dcbi_2 =	"7c0003ac-RR",
   nand_3 =	"7c0003b8RR~R.",
   nand_3 =	"7c0003b8RR~R.",
+  dsn_2 =	"7c0003c6-RR",
+  stvxl_3 =	"7c0003ceVRR",
   divd_3 =	"7c0003d2RRR.",
   divd_3 =	"7c0003d2RRR.",
   divw_3 =	"7c0003d6RRR.",
   divw_3 =	"7c0003d6RRR.",
+  popcntd_2 =	"7c0003f4RR~",
   cmpb_3 =	"7c0003f8RR~R.",
   cmpb_3 =	"7c0003f8RR~R.",
   mcrxr_1 =	"7c000400X",
   mcrxr_1 =	"7c000400X",
+  lbdx_3 =	"7c000406RRR",
   subfco_3 =	"7c000410RRR.",
   subfco_3 =	"7c000410RRR.",
   subco_3 =	"7c000410RRR~.",
   subco_3 =	"7c000410RRR~.",
   addco_3 =	"7c000414RRR.",
   addco_3 =	"7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
   lfsx_3 =	"7c00042eFR0R",
   lfsx_3 =	"7c00042eFR0R",
   srw_3 =	"7c000430RR~R.",
   srw_3 =	"7c000430RR~R.",
   srd_3 =	"7c000436RR~R.",
   srd_3 =	"7c000436RR~R.",
+  lhdx_3 =	"7c000446RRR",
   subfo_3 =	"7c000450RRR.",
   subfo_3 =	"7c000450RRR.",
   subo_3 =	"7c000450RRR~.",
   subo_3 =	"7c000450RRR~.",
   lfsux_3 =	"7c00046eFR0R",
   lfsux_3 =	"7c00046eFR0R",
+  lwdx_3 =	"7c000486RRR",
   lswi_3 =	"7c0004aaRR0A",
   lswi_3 =	"7c0004aaRR0A",
   sync_0 =	"7c0004ac",
   sync_0 =	"7c0004ac",
   lwsync_0 =	"7c2004ac",
   lwsync_0 =	"7c2004ac",
   ptesync_0 =	"7c4004ac",
   ptesync_0 =	"7c4004ac",
   lfdx_3 =	"7c0004aeFR0R",
   lfdx_3 =	"7c0004aeFR0R",
+  lddx_3 =	"7c0004c6RRR",
   nego_2 =	"7c0004d0RR.",
   nego_2 =	"7c0004d0RR.",
   lfdux_3 =	"7c0004eeFR0R",
   lfdux_3 =	"7c0004eeFR0R",
+  stbdx_3 =	"7c000506RRR",
   subfeo_3 =	"7c000510RRR.",
   subfeo_3 =	"7c000510RRR.",
   subeo_3 =	"7c000510RRR~.",
   subeo_3 =	"7c000510RRR~.",
   addeo_3 =	"7c000514RRR.",
   addeo_3 =	"7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
   stswx_3 =	"7c00052aRR0R",
   stswx_3 =	"7c00052aRR0R",
   stwbrx_3 =	"7c00052cRR0R",
   stwbrx_3 =	"7c00052cRR0R",
   stfsx_3 =	"7c00052eFR0R",
   stfsx_3 =	"7c00052eFR0R",
+  sthdx_3 =	"7c000546RRR",
+  ["stbcx._3"] =	"7c00056dRRR",
   stfsux_3 =	"7c00056eFR0R",
   stfsux_3 =	"7c00056eFR0R",
+  stwdx_3 =	"7c000586RRR",
   subfzeo_2 =	"7c000590RR.",
   subfzeo_2 =	"7c000590RR.",
   addzeo_2 =	"7c000594RR.",
   addzeo_2 =	"7c000594RR.",
   stswi_3 =	"7c0005aaRR0A",
   stswi_3 =	"7c0005aaRR0A",
+  ["sthcx._3"] =	"7c0005adRRR",
   stfdx_3 =	"7c0005aeFR0R",
   stfdx_3 =	"7c0005aeFR0R",
+  stddx_3 =	"7c0005c6RRR",
   subfmeo_2 =	"7c0005d0RR.",
   subfmeo_2 =	"7c0005d0RR.",
   mulldo_3 =	"7c0005d2RRR.",
   mulldo_3 =	"7c0005d2RRR.",
   addmeo_2 =	"7c0005d4RR.",
   addmeo_2 =	"7c0005d4RR.",
   mullwo_3 =	"7c0005d6RRR.",
   mullwo_3 =	"7c0005d6RRR.",
   dcba_2 =	"7c0005ec-RR",
   dcba_2 =	"7c0005ec-RR",
   stfdux_3 =	"7c0005eeFR0R",
   stfdux_3 =	"7c0005eeFR0R",
+  stvepxl_3 =	"7c00060eVRR",
   addo_3 =	"7c000614RRR.",
   addo_3 =	"7c000614RRR.",
   lhbrx_3 =	"7c00062cRR0R",
   lhbrx_3 =	"7c00062cRR0R",
+  lfdpx_3 =	"7c00062eF:RR",
   sraw_3 =	"7c000630RR~R.",
   sraw_3 =	"7c000630RR~R.",
   srad_3 =	"7c000634RR~R.",
   srad_3 =	"7c000634RR~R.",
+  lfddx_3 =	"7c000646FRR",
+  stvepx_3 =	"7c00064eVRR",
   srawi_3 =	"7c000670RR~A.",
   srawi_3 =	"7c000670RR~A.",
   sradi_3 =	"7c000674RR~H.",
   sradi_3 =	"7c000674RR~H.",
   eieio_0 =	"7c0006ac",
   eieio_0 =	"7c0006ac",
   lfiwax_3 =	"7c0006aeFR0R",
   lfiwax_3 =	"7c0006aeFR0R",
+  divdeuo_3 =	"7c000712RRR.",
+  divweuo_3 =	"7c000716RRR.",
   sthbrx_3 =	"7c00072cRR0R",
   sthbrx_3 =	"7c00072cRR0R",
+  stfdpx_3 =	"7c00072eF:RR",
   extsh_2 =	"7c000734RR~.",
   extsh_2 =	"7c000734RR~.",
+  stfddx_3 =	"7c000746FRR",
+  divdeo_3 =	"7c000752RRR.",
+  divweo_3 =	"7c000756RRR.",
   extsb_2 =	"7c000774RR~.",
   extsb_2 =	"7c000774RR~.",
   divduo_3 =	"7c000792RRR.",
   divduo_3 =	"7c000792RRR.",
   divwou_3 =	"7c000796RRR.",
   divwou_3 =	"7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
   divwo_3 =	"7c0007d6RRR.",
   divwo_3 =	"7c0007d6RRR.",
   dcbz_2 =	"7c0007ec-RR",
   dcbz_2 =	"7c0007ec-RR",
 
 
+  ["tbegin._1"] =	"7c00051d1",
+  ["tbegin._0"] =	"7c00051d",
+  ["tend._1"] =		"7c00055dY",
+  ["tend._0"] =		"7c00055d",
+  ["tendall._0"] =	"7e00055d",
+  tcheck_1 =		"7c00059cX",
+  ["tsr._1"] =		"7c0005dd1",
+  ["tsuspend._0"] =	"7c0005dd",
+  ["tresume._0"] =	"7c2005dd",
+  ["tabortwc._3"] =	"7c00061dARR",
+  ["tabortdc._3"] =	"7c00065dARR",
+  ["tabortwci._3"] =	"7c00069dARS",
+  ["tabortdci._3"] =	"7c0006ddARS",
+  ["tabort._1"] =	"7c00071d-R-",
+  ["treclaim._1"] =	"7c00075d-R",
+  ["trechkpt._0"] =	"7c0007dd",
+
+  lxsiwzx_3 =	"7c000018QRR",
+  lxsiwax_3 =	"7c000098QRR",
+  mfvsrd_2 =	"7c000066-Rq",
+  mfvsrwz_2 =	"7c0000e6-Rq",
+  stxsiwx_3 =	"7c000118QRR",
+  mtvsrd_2 =	"7c000166QR",
+  mtvsrwa_2 =	"7c0001a6QR",
+  lxvdsx_3 =	"7c000298QRR",
+  lxsspx_3 =	"7c000418QRR",
+  lxsdx_3 =	"7c000498QRR",
+  stxsspx_3 =	"7c000518QRR",
+  stxsdx_3 =	"7c000598QRR",
+  lxvw4x_3 =	"7c000618QRR",
+  lxvd2x_3 =	"7c000698QRR",
+  stxvw4x_3 =	"7c000718QRR",
+  stxvd2x_3 =	"7c000798QRR",
+
   -- Primary opcode 30:
   -- Primary opcode 30:
   rldicl_4 =	"78000000RR~HM.",
   rldicl_4 =	"78000000RR~HM.",
   rldicr_4 =	"78000004RR~HM.",
   rldicr_4 =	"78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
   rldcl_4 =	"78000010RR~RM.",
   rldcl_4 =	"78000010RR~RM.",
   rldcr_4 =	"78000012RR~RM.",
   rldcr_4 =	"78000012RR~RM.",
 
 
+  rotldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = "0"
+  end),
+  rotrdi_3 =	op_alias("rldicl_4", function(p)
+    p[3] = "64-("..p[3]..")"; p[4] = "0"
+  end),
+  rotld_3 =	op_alias("rldcl_4", function(p)
+    p[4] = "0"
+  end),
+  sldi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"
+  end),
+  srdi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "64-("..p[3]..")"
+  end),
+  clrldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "0"
+  end),
+  clrrdi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"; p[3] = "0"
+  end),
+
+  -- Primary opcode 56:
+  lq_2 =	"e0000000R:D", -- NYI: displacement must be divisible by 8.
+
+  -- Primary opcode 57:
+  lfdp_2 =	"e4000000F:D", -- NYI: displacement must be divisible by 4.
+
   -- Primary opcode 59:
   -- Primary opcode 59:
   fdivs_3 =	"ec000024FFF.",
   fdivs_3 =	"ec000024FFF.",
   fsubs_3 =	"ec000028FFF.",
   fsubs_3 =	"ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
   fmadds_4 =	"ec00003aFFFF~.",
   fmadds_4 =	"ec00003aFFFF~.",
   fnmsubs_4 =	"ec00003cFFFF~.",
   fnmsubs_4 =	"ec00003cFFFF~.",
   fnmadds_4 =	"ec00003eFFFF~.",
   fnmadds_4 =	"ec00003eFFFF~.",
+  fcfids_2 =	"ec00069cF-F.",
+  fcfidus_2 =	"ec00079cF-F.",
+
+  dadd_3 =	"ec000004FFF.",
+  dqua_4 =	"ec000006FFFZ.",
+  dmul_3 =	"ec000044FFF.",
+  drrnd_4 =	"ec000046FFFZ.",
+  dscli_3 =	"ec000084FF6.",
+  dquai_4 =	"ec000086SF~FZ.",
+  dscri_3 =	"ec0000c4FF6.",
+  drintx_4 =	"ec0000c61F~FZ.",
+  dcmpo_3 =	"ec000104XFF",
+  dtstex_3 =	"ec000144XFF",
+  dtstdc_3 =	"ec000184XF6",
+  dtstdg_3 =	"ec0001c4XF6",
+  drintn_4 =	"ec0001c61F~FZ.",
+  dctdp_2 =	"ec000204F-F.",
+  dctfix_2 =	"ec000244F-F.",
+  ddedpd_3 =	"ec000284ZF~F.",
+  dxex_2 =	"ec0002c4F-F.",
+  dsub_3 =	"ec000404FFF.",
+  ddiv_3 =	"ec000444FFF.",
+  dcmpu_3 =	"ec000504XFF",
+  dtstsf_3 =	"ec000544XFF",
+  drsp_2 =	"ec000604F-F.",
+  dcffix_2 =	"ec000644F-F.",
+  denbcd_3 =	"ec000684YF~F.",
+  diex_3 =	"ec0006c4FFF.",
+
+  -- Primary opcode 60:
+  xsaddsp_3 =		"f0000000QQQ",
+  xsmaddasp_3 =		"f0000008QQQ",
+  xxsldwi_4 =		"f0000010QQQz",
+  xsrsqrtesp_2 =	"f0000028Q-Q",
+  xssqrtsp_2 =		"f000002cQ-Q",
+  xxsel_4 =		"f0000030QQQQ",
+  xssubsp_3 =		"f0000040QQQ",
+  xsmaddmsp_3 =		"f0000048QQQ",
+  xxpermdi_4 =		"f0000050QQQz",
+  xsresp_2 =		"f0000068Q-Q",
+  xsmulsp_3 =		"f0000080QQQ",
+  xsmsubasp_3 =		"f0000088QQQ",
+  xxmrghw_3 =		"f0000090QQQ",
+  xsdivsp_3 =		"f00000c0QQQ",
+  xsmsubmsp_3 =		"f00000c8QQQ",
+  xsadddp_3 =		"f0000100QQQ",
+  xsmaddadp_3 =		"f0000108QQQ",
+  xscmpudp_3 =		"f0000118XQQ",
+  xscvdpuxws_2 =	"f0000120Q-Q",
+  xsrdpi_2 =		"f0000124Q-Q",
+  xsrsqrtedp_2 =	"f0000128Q-Q",
+  xssqrtdp_2 =		"f000012cQ-Q",
+  xssubdp_3 =		"f0000140QQQ",
+  xsmaddmdp_3 =		"f0000148QQQ",
+  xscmpodp_3 =		"f0000158XQQ",
+  xscvdpsxws_2 =	"f0000160Q-Q",
+  xsrdpiz_2 =		"f0000164Q-Q",
+  xsredp_2 =		"f0000168Q-Q",
+  xsmuldp_3 =		"f0000180QQQ",
+  xsmsubadp_3 =		"f0000188QQQ",
+  xxmrglw_3 =		"f0000190QQQ",
+  xsrdpip_2 =		"f00001a4Q-Q",
+  xstsqrtdp_2 =		"f00001a8X-Q",
+  xsrdpic_2 =		"f00001acQ-Q",
+  xsdivdp_3 =		"f00001c0QQQ",
+  xsmsubmdp_3 =		"f00001c8QQQ",
+  xsrdpim_2 =		"f00001e4Q-Q",
+  xstdivdp_3 =		"f00001e8XQQ",
+  xvaddsp_3 =		"f0000200QQQ",
+  xvmaddasp_3 =		"f0000208QQQ",
+  xvcmpeqsp_3 =		"f0000218QQQ",
+  xvcvspuxws_2 =	"f0000220Q-Q",
+  xvrspi_2 =		"f0000224Q-Q",
+  xvrsqrtesp_2 =	"f0000228Q-Q",
+  xvsqrtsp_2 =		"f000022cQ-Q",
+  xvsubsp_3 =		"f0000240QQQ",
+  xvmaddmsp_3 =		"f0000248QQQ",
+  xvcmpgtsp_3 =		"f0000258QQQ",
+  xvcvspsxws_2 =	"f0000260Q-Q",
+  xvrspiz_2 =		"f0000264Q-Q",
+  xvresp_2 =		"f0000268Q-Q",
+  xvmulsp_3 =		"f0000280QQQ",
+  xvmsubasp_3 =		"f0000288QQQ",
+  xxspltw_3 =		"f0000290QQg~",
+  xvcmpgesp_3 =		"f0000298QQQ",
+  xvcvuxwsp_2 =		"f00002a0Q-Q",
+  xvrspip_2 =		"f00002a4Q-Q",
+  xvtsqrtsp_2 =		"f00002a8X-Q",
+  xvrspic_2 =		"f00002acQ-Q",
+  xvdivsp_3 =		"f00002c0QQQ",
+  xvmsubmsp_3 =		"f00002c8QQQ",
+  xvcvsxwsp_2 =		"f00002e0Q-Q",
+  xvrspim_2 =		"f00002e4Q-Q",
+  xvtdivsp_3 =		"f00002e8XQQ",
+  xvadddp_3 =		"f0000300QQQ",
+  xvmaddadp_3 =		"f0000308QQQ",
+  xvcmpeqdp_3 =		"f0000318QQQ",
+  xvcvdpuxws_2 =	"f0000320Q-Q",
+  xvrdpi_2 =		"f0000324Q-Q",
+  xvrsqrtedp_2 =	"f0000328Q-Q",
+  xvsqrtdp_2 =		"f000032cQ-Q",
+  xvsubdp_3 =		"f0000340QQQ",
+  xvmaddmdp_3 =		"f0000348QQQ",
+  xvcmpgtdp_3 =		"f0000358QQQ",
+  xvcvdpsxws_2 =	"f0000360Q-Q",
+  xvrdpiz_2 =		"f0000364Q-Q",
+  xvredp_2 =		"f0000368Q-Q",
+  xvmuldp_3 =		"f0000380QQQ",
+  xvmsubadp_3 =		"f0000388QQQ",
+  xvcmpgedp_3 =		"f0000398QQQ",
+  xvcvuxwdp_2 =		"f00003a0Q-Q",
+  xvrdpip_2 =		"f00003a4Q-Q",
+  xvtsqrtdp_2 =		"f00003a8X-Q",
+  xvrdpic_2 =		"f00003acQ-Q",
+  xvdivdp_3 =		"f00003c0QQQ",
+  xvmsubmdp_3 =		"f00003c8QQQ",
+  xvcvsxwdp_2 =		"f00003e0Q-Q",
+  xvrdpim_2 =		"f00003e4Q-Q",
+  xvtdivdp_3 =		"f00003e8XQQ",
+  xsnmaddasp_3 =	"f0000408QQQ",
+  xxland_3 =		"f0000410QQQ",
+  xscvdpsp_2 =		"f0000424Q-Q",
+  xscvdpspn_2 =		"f000042cQ-Q",
+  xsnmaddmsp_3 =	"f0000448QQQ",
+  xxlandc_3 =		"f0000450QQQ",
+  xsrsp_2 =		"f0000464Q-Q",
+  xsnmsubasp_3 =	"f0000488QQQ",
+  xxlor_3 =		"f0000490QQQ",
+  xscvuxdsp_2 =		"f00004a0Q-Q",
+  xsnmsubmsp_3 =	"f00004c8QQQ",
+  xxlxor_3 =		"f00004d0QQQ",
+  xscvsxdsp_2 =		"f00004e0Q-Q",
+  xsmaxdp_3 =		"f0000500QQQ",
+  xsnmaddadp_3 =	"f0000508QQQ",
+  xxlnor_3 =		"f0000510QQQ",
+  xscvdpuxds_2 =	"f0000520Q-Q",
+  xscvspdp_2 =		"f0000524Q-Q",
+  xscvspdpn_2 =		"f000052cQ-Q",
+  xsmindp_3 =		"f0000540QQQ",
+  xsnmaddmdp_3 =	"f0000548QQQ",
+  xxlorc_3 =		"f0000550QQQ",
+  xscvdpsxds_2 =	"f0000560Q-Q",
+  xsabsdp_2 =		"f0000564Q-Q",
+  xscpsgndp_3 =		"f0000580QQQ",
+  xsnmsubadp_3 =	"f0000588QQQ",
+  xxlnand_3 =		"f0000590QQQ",
+  xscvuxddp_2 =		"f00005a0Q-Q",
+  xsnabsdp_2 =		"f00005a4Q-Q",
+  xsnmsubmdp_3 =	"f00005c8QQQ",
+  xxleqv_3 =		"f00005d0QQQ",
+  xscvsxddp_2 =		"f00005e0Q-Q",
+  xsnegdp_2 =		"f00005e4Q-Q",
+  xvmaxsp_3 =		"f0000600QQQ",
+  xvnmaddasp_3 =	"f0000608QQQ",
+  ["xvcmpeqsp._3"] =	"f0000618QQQ",
+  xvcvspuxds_2 =	"f0000620Q-Q",
+  xvcvdpsp_2 =		"f0000624Q-Q",
+  xvminsp_3 =		"f0000640QQQ",
+  xvnmaddmsp_3 =	"f0000648QQQ",
+  ["xvcmpgtsp._3"] =	"f0000658QQQ",
+  xvcvspsxds_2 =	"f0000660Q-Q",
+  xvabssp_2 =		"f0000664Q-Q",
+  xvcpsgnsp_3 =		"f0000680QQQ",
+  xvnmsubasp_3 =	"f0000688QQQ",
+  ["xvcmpgesp._3"] =	"f0000698QQQ",
+  xvcvuxdsp_2 =		"f00006a0Q-Q",
+  xvnabssp_2 =		"f00006a4Q-Q",
+  xvnmsubmsp_3 =	"f00006c8QQQ",
+  xvcvsxdsp_2 =		"f00006e0Q-Q",
+  xvnegsp_2 =		"f00006e4Q-Q",
+  xvmaxdp_3 =		"f0000700QQQ",
+  xvnmaddadp_3 =	"f0000708QQQ",
+  ["xvcmpeqdp._3"] =	"f0000718QQQ",
+  xvcvdpuxds_2 =	"f0000720Q-Q",
+  xvcvspdp_2 =		"f0000724Q-Q",
+  xvmindp_3 =		"f0000740QQQ",
+  xvnmaddmdp_3 =	"f0000748QQQ",
+  ["xvcmpgtdp._3"] =	"f0000758QQQ",
+  xvcvdpsxds_2 =	"f0000760Q-Q",
+  xvabsdp_2 =		"f0000764Q-Q",
+  xvcpsgndp_3 =		"f0000780QQQ",
+  xvnmsubadp_3 =	"f0000788QQQ",
+  ["xvcmpgedp._3"] =	"f0000798QQQ",
+  xvcvuxddp_2 =		"f00007a0Q-Q",
+  xvnabsdp_2 =		"f00007a4Q-Q",
+  xvnmsubmdp_3 =	"f00007c8QQQ",
+  xvcvsxddp_2 =		"f00007e0Q-Q",
+  xvnegdp_2 =		"f00007e4Q-Q",
+
+  -- Primary opcode 61:
+  stfdp_2 =	"f4000000F:D", -- NYI: displacement must be divisible by 4.
+
+  -- Primary opcode 62:
+  stq_2 =	"f8000002R:D", -- NYI: displacement must be divisible by 8.
 
 
   -- Primary opcode 63:
   -- Primary opcode 63:
   fdiv_3 =	"fc000024FFF.",
   fdiv_3 =	"fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
   frsp_2 =	"fc000018F-F.",
   frsp_2 =	"fc000018F-F.",
   fctiw_2 =	"fc00001cF-F.",
   fctiw_2 =	"fc00001cF-F.",
   fctiwz_2 =	"fc00001eF-F.",
   fctiwz_2 =	"fc00001eF-F.",
+  ftdiv_2 =	"fc000100X-F.",
+  fctiwu_2 =	"fc00011cF-F.",
+  fctiwuz_2 =	"fc00011eF-F.",
   mtfsfi_2 =	"fc00010cAA", -- NYI: upshift.
   mtfsfi_2 =	"fc00010cAA", -- NYI: upshift.
   fnabs_2 =	"fc000110F-F.",
   fnabs_2 =	"fc000110F-F.",
+  ftsqrt_2 =	"fc000140X-F.",
   fabs_2 =	"fc000210F-F.",
   fabs_2 =	"fc000210F-F.",
   frin_2 =	"fc000310F-F.",
   frin_2 =	"fc000310F-F.",
   friz_2 =	"fc000350F-F.",
   friz_2 =	"fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
   -- NYI: mtfsf, mtfsb0, mtfsb1.
   -- NYI: mtfsf, mtfsb0, mtfsb1.
   fctid_2 =	"fc00065cF-F.",
   fctid_2 =	"fc00065cF-F.",
   fctidz_2 =	"fc00065eF-F.",
   fctidz_2 =	"fc00065eF-F.",
+  fmrgow_3 =	"fc00068cFFF",
   fcfid_2 =	"fc00069cF-F.",
   fcfid_2 =	"fc00069cF-F.",
+  fctidu_2 =	"fc00075cF-F.",
+  fctiduz_2 =	"fc00075eF-F.",
+  fmrgew_3 =	"fc00078cFFF",
+  fcfidu_2 =	"fc00079cF-F.",
+
+  daddq_3 =	"fc000004F:F:F:.",
+  dquaq_4 =	"fc000006F:F:F:Z.",
+  dmulq_3 =	"fc000044F:F:F:.",
+  drrndq_4 =	"fc000046F:F:F:Z.",
+  dscliq_3 =	"fc000084F:F:6.",
+  dquaiq_4 =	"fc000086SF:~F:Z.",
+  dscriq_3 =	"fc0000c4F:F:6.",
+  drintxq_4 =	"fc0000c61F:~F:Z.",
+  dcmpoq_3 =	"fc000104XF:F:",
+  dtstexq_3 =	"fc000144XF:F:",
+  dtstdcq_3 =	"fc000184XF:6",
+  dtstdgq_3 =	"fc0001c4XF:6",
+  drintnq_4 =	"fc0001c61F:~F:Z.",
+  dctqpq_2 =	"fc000204F:-F:.",
+  dctfixq_2 =	"fc000244F:-F:.",
+  ddedpdq_3 =	"fc000284ZF:~F:.",
+  dxexq_2 =	"fc0002c4F:-F:.",
+  dsubq_3 =	"fc000404F:F:F:.",
+  ddivq_3 =	"fc000444F:F:F:.",
+  dcmpuq_3 =	"fc000504XF:F:",
+  dtstsfq_3 =	"fc000544XF:F:",
+  drdpq_2 =	"fc000604F:-F:.",
+  dcffixq_2 =	"fc000644F:-F:.",
+  denbcdq_3 =	"fc000684YF:~F:.",
+  diexq_3 =	"fc0006c4F:FF:.",
 
 
   -- Primary opcode 4, SPE APU extension:
   -- Primary opcode 4, SPE APU extension:
   evaddw_3 =		"10000200RRR",
   evaddw_3 =		"10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
 do
 do
   local t = {}
   local t = {}
   for k,v in pairs(map_op) do
   for k,v in pairs(map_op) do
-    if sub(v, -1) == "." then
+    if type(v) == "string" and sub(v, -1) == "." then
       local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
       local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
       t[sub(k, 1, -3).."."..sub(k, -2)] = v2
       t[sub(k, 1, -3).."."..sub(k, -2)] = v2
     end
     end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
   werror("bad register name `"..expr.."'")
   werror("bad register name `"..expr.."'")
 end
 end
 
 
+local function parse_vr(expr)
+  local r = match(expr, "^v([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_vs(expr)
+  local r = match(expr, "^vs([1-6]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 63 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
 local function parse_cr(expr)
 local function parse_cr(expr)
   local r = match(expr, "^cr([0-7])$")
   local r = match(expr, "^cr([0-7])$")
   if r then return tonumber(r) end
   if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
   werror("bad condition bit name `"..expr.."'")
   werror("bad condition bit name `"..expr.."'")
 end
 end
 
 
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
 local function parse_imm(imm, bits, shift, scale, signed)
 local function parse_imm(imm, bits, shift, scale, signed)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
   if n then
     local m = sar(n, scale)
     local m = sar(n, scale)
     if shl(m, scale) == n then
     if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
       end
       end
     end
     end
     werror("out of range immediate `"..imm.."'")
     werror("out of range immediate `"..imm.."'")
-  elseif match(imm, "^r([1-3]?[0-9])$") or
+  elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+	 match(imm, "^vs([1-6]?[0-9])$") or
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
     werror("expected immediate operand, got register")
   else
   else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
 end
 end
 
 
 local function parse_shiftmask(imm, isshift)
 local function parse_shiftmask(imm, isshift)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
   if n then
     if shr(n, 6) == 0 then
     if shr(n, 6) == 0 then
-      local lsb = band(imm, 31)
-      local msb = imm - lsb
+      local lsb = band(n, 31)
+      local msb = n - lsb
       return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
       return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
     end
     end
     werror("out of range immediate `"..imm.."'")
     werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
     werror("expected immediate operand, got register")
   else
   else
-    werror("NYI: parameterized 64 bit shift/mask")
+    waction("IMMSH", isshift and 1 or 0, imm)
+    return 0;
   end
   end
 end
 end
 
 
@@ -1011,7 +1648,7 @@ end
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Handle opcodes defined with template strings.
 -- Handle opcodes defined with template strings.
-map_op[".template__"] = function(params, template, nparams)
+op_template = function(params, template, nparams)
   if not params then return sub(template, 9) end
   if not params then return sub(template, 9) end
   local op = tonumber(sub(template, 1, 8), 16)
   local op = tonumber(sub(template, 1, 8), 16)
   local n, rs = 1, 26
   local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
       rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
     elseif p == "F" then
     elseif p == "F" then
       rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
       rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
+    elseif p == "V" then
+      rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
+    elseif p == "Q" then
+      local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
+      local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
+      op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
+    elseif p == "q" then
+      local vs = parse_vs(params[n]); n = n + 1
+      op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
     elseif p == "A" then
     elseif p == "A" then
       rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
       rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
     elseif p == "S" then
     elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
       rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
     elseif p == "X" then
     elseif p == "X" then
       rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
       rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
+    elseif p == "1" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
+    elseif p == "g" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
+    elseif p == "3" then
+      rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
+    elseif p == "P" then
+      rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "p" then
+      op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "6" then
+      rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
+    elseif p == "Y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
+    elseif p == "y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
+    elseif p == "Z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
+    elseif p == "z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
     elseif p == "W" then
     elseif p == "W" then
       op = op + parse_cr(params[n]); n = n + 1
       op = op + parse_cr(params[n]); n = n + 1
     elseif p == "G" then
     elseif p == "G" then
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
       local lo = band(op, mm)
       local lo = band(op, mm)
       local hi = band(op, shl(mm, 5))
       local hi = band(op, shl(mm, 5))
       op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
       op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
+    elseif p == ":" then
+      if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
     elseif p == "-" then
     elseif p == "-" then
       rs = rs - 5
       rs = rs - 5
     elseif p == "." then
     elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
   wputpos(pos, op)
   wputpos(pos, op)
 end
 end
 
 
+map_op[".template__"] = op_template
+
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Pseudo-opcode to mark the position where the action list is to be emitted.
 -- Pseudo-opcode to mark the position where the action list is to be emitted.

+ 3 - 3
Source/ThirdParty/LuaJIT/dynasm/dasm_proto.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** DynASM encoding engine prototypes.
 ** DynASM encoding engine prototypes.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 
@@ -10,8 +10,8 @@
 #include <stddef.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <stdarg.h>
 
 
-#define DASM_IDENT	"DynASM 1.3.0"
-#define DASM_VERSION	10300	/* 1.3.0 */
+#define DASM_IDENT	"DynASM 1.4.0"
+#define DASM_VERSION	10400	/* 1.4.0 */
 
 
 #ifndef Dst_DECL
 #ifndef Dst_DECL
 #define Dst_DECL	dasm_State **Dst
 #define Dst_DECL	dasm_State **Dst

+ 1 - 1
Source/ThirdParty/LuaJIT/dynasm/dasm_x64.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- DynASM x64 module.
 -- DynASM x64 module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- This module just sets 64 bit mode for the combined x86/x64 module.
 -- This module just sets 64 bit mode for the combined x86/x64 module.

+ 34 - 7
Source/ThirdParty/LuaJIT/dynasm/dasm_x86.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** DynASM x86 encoding engine.
 ** DynASM x86 encoding engine.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 */
 
 
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
   dasm_State *D = Dst_REF;
   dasm_State *D = Dst_REF;
   dasm_ActList p = D->actionlist + start;
   dasm_ActList p = D->actionlist + start;
   dasm_Section *sec = D->section;
   dasm_Section *sec = D->section;
-  int pos = sec->pos, ofs = sec->ofs, mrm = 4;
+  int pos = sec->pos, ofs = sec->ofs, mrm = -1;
   int *b;
   int *b;
 
 
   if (pos >= sec->epos) {
   if (pos >= sec->epos) {
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
       b[pos++] = n;
       b[pos++] = n;
       switch (action) {
       switch (action) {
       case DASM_DISP:
       case DASM_DISP:
-	if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
+	if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
       case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
       case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
       case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
       case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
       case DASM_IMM_D: ofs += 4; break;
       case DASM_IMM_D: ofs += 4; break;
@@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...)
       case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
       case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
       case DASM_SPACE: p++; ofs += n; break;
       case DASM_SPACE: p++; ofs += n; break;
       case DASM_SETLABEL: b[pos-2] = -0x40000000; break;  /* Neg. label ofs. */
       case DASM_SETLABEL: b[pos-2] = -0x40000000; break;  /* Neg. label ofs. */
-      case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
-	if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
+      case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
+	if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
+	if (*p < 0x20 && (n&7) == 4) ofs++;
+	switch ((*p++ >> 3) & 3) {
+	case 3: n |= b[pos-3];
+	case 2: n |= b[pos-2];
+	case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
+	}
+	continue;
       }
       }
-      mrm = 4;
+      mrm = -1;
     } else {
     } else {
       int *pl, n;
       int *pl, n;
       switch (action) {
       switch (action) {
@@ -391,7 +398,27 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_IMM_D: wd: dasmd(n); break;
 	case DASM_IMM_D: wd: dasmd(n); break;
 	case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
 	case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
 	case DASM_IMM_W: dasmw(n); break;
 	case DASM_IMM_W: dasmw(n); break;
-	case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
+	case DASM_VREG: {
+	  int t = *p++;
+	  unsigned char *ex = cp - (t&7);
+	  if ((n & 8) && t < 0xa0) {
+	    if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
+	    n &= 7;
+	  } else if (n & 0x10) {
+	    if (*ex & 0x80) {
+	      *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
+	    }
+	    while (++ex < cp) ex[-1] = *ex;
+	    if (mark) mark--;
+	    cp--;
+	    n &= 7;
+	  }
+	  if (t >= 0xc0) n <<= 4;
+	  else if (t >= 0x40) n <<= 3;
+	  else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
+	  cp[-1] ^= n;
+	  break;
+	}
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
 	  b++; n = (int)(ptrdiff_t)D->globals[-n];
 	  b++; n = (int)(ptrdiff_t)D->globals[-n];
 	case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
 	case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */

+ 433 - 100
Source/ThirdParty/LuaJIT/dynasm/dasm_x86.lua

@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 -- DynASM x86/x64 module.
 -- DynASM x86/x64 module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
@@ -11,9 +11,9 @@ local x64 = x64
 local _info = {
 local _info = {
   arch =	x64 and "x64" or "x86",
   arch =	x64 and "x64" or "x86",
   description =	"DynASM x86/x64 module",
   description =	"DynASM x86/x64 module",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   license =	"MIT",
   license =	"MIT",
 }
 }
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
 local _s = string
 local _s = string
 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
-local concat, sort = table.concat, table.sort
+local concat, sort, remove = table.concat, table.sort, table.remove
 local bit = bit or require("bit")
 local bit = bit or require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
 
 
 -- Inherited tables and callbacks.
 -- Inherited tables and callbacks.
 local g_opt, g_arch
 local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
   -- int arg, 1 buffer pos:
   -- int arg, 1 buffer pos:
   "DISP",  "IMM_S", "IMM_B", "IMM_W", "IMM_D",  "IMM_WB", "IMM_DB",
   "DISP",  "IMM_S", "IMM_B", "IMM_W", "IMM_D",  "IMM_WB", "IMM_DB",
   -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
   -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
-  "VREG", "SPACE", -- !x64: VREG support NYI.
+  "VREG", "SPACE",
   -- ptrdiff_t arg, 1 buffer pos (address): !x64
   -- ptrdiff_t arg, 1 buffer pos (address): !x64
   "SETLABEL", "REL_A",
   "SETLABEL", "REL_A",
   -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
   -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
 -- Current number of section buffer positions for dasm_put().
 -- Current number of section buffer positions for dasm_put().
 local secpos = 1
 local secpos = 1
 
 
+-- VREG kind encodings, pre-shifted by 5 bits.
+local map_vreg = {
+  ["modrm.rm.m"] = 0x00,
+  ["modrm.rm.r"] = 0x20,
+  ["opcode"] =     0x20,
+  ["sib.base"] =   0x20,
+  ["sib.index"] =  0x40,
+  ["modrm.reg"] =  0x80,
+  ["vex.v"] =      0xa0,
+  ["imm.hi"] =     0xc0,
+}
+
+-- Current number of VREG actions contributing to REX/VEX shrinkage.
+local vreg_shrink_count = 0
+
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Compute action numbers for action names.
 -- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
   if a or num then secpos = secpos + (num or 1) end
   if a or num then secpos = secpos + (num or 1) end
 end
 end
 
 
+-- Optionally add a VREG action.
+local function wvreg(kind, vreg, psz, sk, defer)
+  if not vreg then return end
+  waction("VREG", vreg)
+  local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
+  if b < (sk or 0) then
+    vreg_shrink_count = vreg_shrink_count + 1
+  end
+  if not defer then
+    b = b + vreg_shrink_count * 8
+    vreg_shrink_count = 0
+  end
+  wputxb(b + (psz or 0))
+end
+
 -- Add call to embedded DynASM C code.
 -- Add call to embedded DynASM C code.
 local function wcall(func, args)
 local function wcall(func, args)
   wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
   wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
     local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
     local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
     if needrex then map_reg_needrex[iname] = true end
     if needrex then map_reg_needrex[iname] = true end
     local name
     local name
-    if sz == "o" then name = format("xmm%d", i)
+    if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
     elseif sz == "f" then name = format("st%d", i)
     elseif sz == "f" then name = format("st%d", i)
     else name = format("r%d%s", i, sz == addrsize and "" or sz) end
     else name = format("r%d%s", i, sz == addrsize and "" or sz) end
     map_archdef[name] = iname
     map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
 map_reg_valid_index[map_archdef.esp] = false
 map_reg_valid_index[map_archdef.esp] = false
 if x64 then map_reg_valid_index[map_archdef.rsp] = false end
 if x64 then map_reg_valid_index[map_archdef.rsp] = false end
+if x64 then map_reg_needrex[map_archdef.Rb] = true end
 map_archdef["Ra"] = "@"..addrsize
 map_archdef["Ra"] = "@"..addrsize
 
 
 -- FP registers (internally tword sized, but use "f" as operand size).
 -- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
 -- SSE registers (oword sized, but qword and dword accessible).
 -- SSE registers (oword sized, but qword and dword accessible).
 mkrmap("o", "xmm")
 mkrmap("o", "xmm")
 
 
+-- AVX registers (yword sized, but oword, qword and dword accessible).
+mkrmap("y", "ymm")
+
 -- Operand size prefixes to codes.
 -- Operand size prefixes to codes.
 local map_opsize = {
 local map_opsize = {
-  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
-  aword = addrsize,
+  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
+  tword = "t", aword = addrsize,
 }
 }
 
 
 -- Operand size code to number.
 -- Operand size code to number.
 local map_opsizenum = {
 local map_opsizenum = {
-  b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
+  b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
 }
 }
 
 
 -- Operand size code to name.
 -- Operand size code to name.
 local map_opsizename = {
 local map_opsizename = {
-  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
-  f = "fpword",
+  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
+  t = "tword", f = "fpword",
 }
 }
 
 
 -- Valid index register scale factors.
 -- Valid index register scale factors.
@@ -460,9 +494,45 @@ local function wputszarg(sz, n)
 end
 end
 
 
 -- Put multi-byte opcode with operand-size dependent modifications.
 -- Put multi-byte opcode with operand-size dependent modifications.
-local function wputop(sz, op, rex)
+local function wputop(sz, op, rex, vex, vregr, vregxb)
+  local psz, sk = 0, nil
+  if vex then
+    local tail
+    if vex.m == 1 and band(rex, 11) == 0 then
+      if x64 and vregxb then
+	sk = map_vreg["modrm.reg"]
+      else
+	wputb(0xc5)
+      tail = shl(bxor(band(rex, 4), 4), 5)
+      psz = 3
+      end
+    end
+    if not tail then
+      wputb(0xc4)
+      wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
+      tail = shl(band(rex, 8), 4)
+      psz = 4
+    end
+    local reg, vreg = 0, nil
+    if vex.v then
+      reg = vex.v.reg
+      if not reg then werror("bad vex operand") end
+      if reg < 0 then reg = 0; vreg = vex.v.vreg end
+    end
+    if sz == "y" or vex.l then tail = tail + 4 end
+    wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
+    wvreg("vex.v", vreg)
+    rex = 0
+    if op >= 256 then werror("bad vex opcode") end
+  else
+    if rex ~= 0 then
+      if not x64 then werror("bad operand size") end
+    elseif (vregr or vregxb) and x64 then
+      rex = 0x10
+      sk = map_vreg["vex.v"]
+    end
+  end
   local r
   local r
-  if rex ~= 0 and not x64 then werror("bad operand size") end
   if sz == "w" then wputb(102) end
   if sz == "w" then wputb(102) end
   -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
   -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
   if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
   if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +541,20 @@ local function wputop(sz, op, rex)
     if rex ~= 0 then
     if rex ~= 0 then
       local opc3 = band(op, 0xffff00)
       local opc3 = band(op, 0xffff00)
       if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
       if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
-	wputb(64 + band(rex, 15)); rex = 0
+	wputb(64 + band(rex, 15)); rex = 0; psz = 2
       end
       end
     end
     end
-    wputb(shr(op, 16)); op = band(op, 0xffff)
+    wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
   end
   end
   if op >= 256 then
   if op >= 256 then
     local b = shr(op, 8)
     local b = shr(op, 8)
-    if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
-    wputb(b)
-    op = band(op, 255)
+    if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
+    wputb(b); op = band(op, 255); psz = psz + 1
   end
   end
-  if rex ~= 0 then wputb(64 + band(rex, 15)) end
+  if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
   if sz == "b" then op = op - 1 end
   if sz == "b" then op = op - 1 end
   wputb(op)
   wputb(op)
+  return psz, sk
 end
 end
 
 
 -- Put ModRM or SIB formatted byte.
 -- Put ModRM or SIB formatted byte.
@@ -494,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
 end
 end
 
 
 -- Put ModRM/SIB plus optional displacement.
 -- Put ModRM/SIB plus optional displacement.
-local function wputmrmsib(t, imark, s, vsreg)
+local function wputmrmsib(t, imark, s, vsreg, psz, sk)
   local vreg, vxreg
   local vreg, vxreg
   local reg, xreg = t.reg, t.xreg
   local reg, xreg = t.reg, t.xreg
   if reg and reg < 0 then reg = 0; vreg = t.vreg end
   if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
   -- Register mode.
   -- Register mode.
   if sub(t.mode, 1, 1) == "r" then
   if sub(t.mode, 1, 1) == "r" then
     wputmodrm(3, s, reg)
     wputmodrm(3, s, reg)
-    if vsreg then waction("VREG", vsreg); wputxb(2) end
-    if vreg then waction("VREG", vreg); wputxb(0) end
+    wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+    wvreg("modrm.rm.r", vreg, psz+1, sk)
     return
     return
   end
   end
 
 
@@ -519,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
       -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
       -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
       wputmodrm(0, s, 4)
       wputmodrm(0, s, 4)
       if imark == "I" then waction("MARK") end
       if imark == "I" then waction("MARK") end
-      if vsreg then waction("VREG", vsreg); wputxb(2) end
+      wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
       wputmodrm(t.xsc, xreg, 5)
       wputmodrm(t.xsc, xreg, 5)
-      if vxreg then waction("VREG", vxreg); wputxb(3) end
+      wvreg("sib.index", vxreg, psz+2, sk)
     else
     else
       -- Pure 32 bit displacement.
       -- Pure 32 bit displacement.
       if x64 and tdisp ~= "table" then
       if x64 and tdisp ~= "table" then
 	wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
 	wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
+	wvreg("modrm.reg", vsreg, psz+1, sk)
 	if imark == "I" then waction("MARK") end
 	if imark == "I" then waction("MARK") end
 	wputmodrm(0, 4, 5)
 	wputmodrm(0, 4, 5)
       else
       else
 	riprel = x64
 	riprel = x64
 	wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
 	wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
+	wvreg("modrm.reg", vsreg, psz+1, sk)
 	if imark == "I" then waction("MARK") end
 	if imark == "I" then waction("MARK") end
       end
       end
-      if vsreg then waction("VREG", vsreg); wputxb(2) end
     end
     end
     if riprel then -- Emit rip-relative displacement.
     if riprel then -- Emit rip-relative displacement.
       if match("UWSiI", imark) then
       if match("UWSiI", imark) then
@@ -561,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
   if xreg or band(reg, 7) == 4 then
   if xreg or band(reg, 7) == 4 then
     wputmodrm(m or 2, s, 4) -- ModRM.
     wputmodrm(m or 2, s, 4) -- ModRM.
     if m == nil or imark == "I" then waction("MARK") end
     if m == nil or imark == "I" then waction("MARK") end
-    if vsreg then waction("VREG", vsreg); wputxb(2) end
+    wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
     wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
     wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
-    if vxreg then waction("VREG", vxreg); wputxb(3) end
-    if vreg then waction("VREG", vreg); wputxb(1) end
+    wvreg("sib.index", vxreg, psz+2, sk, vreg)
+    wvreg("sib.base", vreg, psz+2, sk)
   else
   else
     wputmodrm(m or 2, s, reg) -- ModRM.
     wputmodrm(m or 2, s, reg) -- ModRM.
     if (imark == "I" and (m == 1 or m == 2)) or
     if (imark == "I" and (m == 1 or m == 2)) or
        (m == nil and (vsreg or vreg)) then waction("MARK") end
        (m == nil and (vsreg or vreg)) then waction("MARK") end
-    if vsreg then waction("VREG", vsreg); wputxb(2) end
-    if vreg then waction("VREG", vreg); wputxb(1) end
+    wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+    wvreg("modrm.rm.m", vreg, psz+1, sk)
   end
   end
 
 
   -- Put displacement.
   -- Put displacement.
@@ -881,9 +952,15 @@ end
 --   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
 --   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
 --             The spare 3 bits are either filled with the last hex digit or
 --             The spare 3 bits are either filled with the last hex digit or
 --             the result from a previous "r"/"R". The opcode is restored.
 --             the result from a previous "r"/"R". The opcode is restored.
+--   "u"       Use VEX encoding, vvvv unused.
+--   "v"/"V"   Use VEX encoding, vvvv from 1st/2nd operand (the operand is
+--             removed from the list used by future characters).
+--   "L"       Force VEX.L
 --
 --
 -- All of the following characters force a flush of the opcode:
 -- All of the following characters force a flush of the opcode:
 --   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
 --   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
+--   "s"       stores a 4 bit immediate from the last register operand,
+--             followed by 4 zero bits.
 --   "S"       stores a signed 8 bit immediate from the last operand.
 --   "S"       stores a signed 8 bit immediate from the last operand.
 --   "U"       stores an unsigned 8 bit immediate from the last operand.
 --   "U"       stores an unsigned 8 bit immediate from the last operand.
 --   "W"       stores an unsigned 16 bit immediate from the last operand.
 --   "W"       stores an unsigned 16 bit immediate from the last operand.
@@ -1081,7 +1158,11 @@ local map_op = {
   btr_2 =	"mrqdw:0FB3Rm|miqdw:0FBA6mU",
   btr_2 =	"mrqdw:0FB3Rm|miqdw:0FBA6mU",
   bts_2 =	"mrqdw:0FABRm|miqdw:0FBA5mU",
   bts_2 =	"mrqdw:0FABRm|miqdw:0FBA5mU",
 
 
+  shld_3 =	"mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
+  shrd_3 =	"mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
+
   rdtsc_0 =	"0F31", -- P1+
   rdtsc_0 =	"0F31", -- P1+
+  rdpmc_0 =	"0F33", -- P6+
   cpuid_0 =	"0FA2", -- P1+
   cpuid_0 =	"0FA2", -- P1+
 
 
   -- floating point ops
   -- floating point ops
@@ -1114,6 +1195,9 @@ local map_op = {
   fucompp_0 =	"DAE9",
   fucompp_0 =	"DAE9",
   fcompp_0 =	"DED9",
   fcompp_0 =	"DED9",
 
 
+  fldenv_1 =	"x.:D94m",
+  fnstenv_1 =	"x.:D96m",
+  fstenv_1 =	"x.:9BD96m",
   fldcw_1 =	"xw:nD95m",
   fldcw_1 =	"xw:nD95m",
   fstcw_1 =	"xw:n9BD97m",
   fstcw_1 =	"xw:n9BD97m",
   fnstcw_1 =	"xw:nD97m",
   fnstcw_1 =	"xw:nD97m",
@@ -1184,11 +1268,13 @@ local map_op = {
   cvtsi2sd_2 =	"rm/od:F20F2ArM|rm/oq:F20F2ArXM",
   cvtsi2sd_2 =	"rm/od:F20F2ArM|rm/oq:F20F2ArXM",
   cvtsi2ss_2 =	"rm/od:F30F2ArM|rm/oq:F30F2ArXM",
   cvtsi2ss_2 =	"rm/od:F30F2ArM|rm/oq:F30F2ArXM",
   cvtss2sd_2 =	"rro:F30F5ArM|rx/od:",
   cvtss2sd_2 =	"rro:F30F5ArM|rx/od:",
-  cvtss2si_2 =	"rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
+  cvtss2si_2 =	"rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
   cvttpd2dq_2 =	"rmo:660FE6rM",
   cvttpd2dq_2 =	"rmo:660FE6rM",
   cvttps2dq_2 =	"rmo:F30F5BrM",
   cvttps2dq_2 =	"rmo:F30F5BrM",
   cvttsd2si_2 =	"rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
   cvttsd2si_2 =	"rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
   cvttss2si_2 =	"rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
   cvttss2si_2 =	"rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
+  fxsave_1 =	"x.:0FAE0m",
+  fxrstor_1 =	"x.:0FAE1m",
   ldmxcsr_1 =	"xd:0FAE2m",
   ldmxcsr_1 =	"xd:0FAE2m",
   lfence_0 =	"0FAEE8",
   lfence_0 =	"0FAEE8",
   maskmovdqu_2 = "rro:660FF7rM",
   maskmovdqu_2 = "rro:660FF7rM",
@@ -1217,46 +1303,14 @@ local map_op = {
   movups_2 =	"rmo:0F10rM|mro:0F11Rm",
   movups_2 =	"rmo:0F10rM|mro:0F11Rm",
   orpd_2 =	"rmo:660F56rM",
   orpd_2 =	"rmo:660F56rM",
   orps_2 =	"rmo:0F56rM",
   orps_2 =	"rmo:0F56rM",
-  packssdw_2 =	"rmo:660F6BrM",
-  packsswb_2 =	"rmo:660F63rM",
-  packuswb_2 =	"rmo:660F67rM",
-  paddb_2 =	"rmo:660FFCrM",
-  paddd_2 =	"rmo:660FFErM",
-  paddq_2 =	"rmo:660FD4rM",
-  paddsb_2 =	"rmo:660FECrM",
-  paddsw_2 =	"rmo:660FEDrM",
-  paddusb_2 =	"rmo:660FDCrM",
-  paddusw_2 =	"rmo:660FDDrM",
-  paddw_2 =	"rmo:660FFDrM",
-  pand_2 =	"rmo:660FDBrM",
-  pandn_2 =	"rmo:660FDFrM",
   pause_0 =	"F390",
   pause_0 =	"F390",
-  pavgb_2 =	"rmo:660FE0rM",
-  pavgw_2 =	"rmo:660FE3rM",
-  pcmpeqb_2 =	"rmo:660F74rM",
-  pcmpeqd_2 =	"rmo:660F76rM",
-  pcmpeqw_2 =	"rmo:660F75rM",
-  pcmpgtb_2 =	"rmo:660F64rM",
-  pcmpgtd_2 =	"rmo:660F66rM",
-  pcmpgtw_2 =	"rmo:660F65rM",
-  pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
+  pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
   pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
   pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
-  pmaddwd_2 =	"rmo:660FF5rM",
-  pmaxsw_2 =	"rmo:660FEErM",
-  pmaxub_2 =	"rmo:660FDErM",
-  pminsw_2 =	"rmo:660FEArM",
-  pminub_2 =	"rmo:660FDArM",
   pmovmskb_2 =	"rr/do:660FD7rM",
   pmovmskb_2 =	"rr/do:660FD7rM",
-  pmulhuw_2 =	"rmo:660FE4rM",
-  pmulhw_2 =	"rmo:660FE5rM",
-  pmullw_2 =	"rmo:660FD5rM",
-  pmuludq_2 =	"rmo:660FF4rM",
-  por_2 =	"rmo:660FEBrM",
   prefetchnta_1 = "xb:n0F180m",
   prefetchnta_1 = "xb:n0F180m",
   prefetcht0_1 = "xb:n0F181m",
   prefetcht0_1 = "xb:n0F181m",
   prefetcht1_1 = "xb:n0F182m",
   prefetcht1_1 = "xb:n0F182m",
   prefetcht2_1 = "xb:n0F183m",
   prefetcht2_1 = "xb:n0F183m",
-  psadbw_2 =	"rmo:660FF6rM",
   pshufd_3 =	"rmio:660F70rMU",
   pshufd_3 =	"rmio:660F70rMU",
   pshufhw_3 =	"rmio:F30F70rMU",
   pshufhw_3 =	"rmio:F30F70rMU",
   pshuflw_3 =	"rmio:F20F70rMU",
   pshuflw_3 =	"rmio:F20F70rMU",
@@ -1270,23 +1324,6 @@ local map_op = {
   psrldq_2 =	"rio:660F733mU",
   psrldq_2 =	"rio:660F733mU",
   psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
   psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
   psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
   psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
-  psubb_2 =	"rmo:660FF8rM",
-  psubd_2 =	"rmo:660FFArM",
-  psubq_2 =	"rmo:660FFBrM",
-  psubsb_2 =	"rmo:660FE8rM",
-  psubsw_2 =	"rmo:660FE9rM",
-  psubusb_2 =	"rmo:660FD8rM",
-  psubusw_2 =	"rmo:660FD9rM",
-  psubw_2 =	"rmo:660FF9rM",
-  punpckhbw_2 =	"rmo:660F68rM",
-  punpckhdq_2 =	"rmo:660F6ArM",
-  punpckhqdq_2 = "rmo:660F6DrM",
-  punpckhwd_2 =	"rmo:660F69rM",
-  punpcklbw_2 =	"rmo:660F60rM",
-  punpckldq_2 =	"rmo:660F62rM",
-  punpcklqdq_2 = "rmo:660F6CrM",
-  punpcklwd_2 =	"rmo:660F61rM",
-  pxor_2 =	"rmo:660FEFrM",
   rcpps_2 =	"rmo:0F53rM",
   rcpps_2 =	"rmo:0F53rM",
   rcpss_2 =	"rro:F30F53rM|rx/od:",
   rcpss_2 =	"rro:F30F53rM|rx/od:",
   rsqrtps_2 =	"rmo:0F52rM",
   rsqrtps_2 =	"rmo:0F52rM",
@@ -1344,7 +1381,7 @@ local map_op = {
   dpps_3 =	"rmio:660F3A40rMU",
   dpps_3 =	"rmio:660F3A40rMU",
   extractps_3 =	"mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
   extractps_3 =	"mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
   insertps_3 =	"rrio:660F3A41rMU|rxi/od:",
   insertps_3 =	"rrio:660F3A41rMU|rxi/od:",
-  movntdqa_2 =	"rmo:660F382ArM",
+  movntdqa_2 =	"rxo:660F382ArM",
   mpsadbw_3 =	"rmio:660F3A42rMU",
   mpsadbw_3 =	"rmio:660F3A42rMU",
   packusdw_2 =	"rmo:660F382BrM",
   packusdw_2 =	"rmo:660F382BrM",
   pblendvb_3 =	"rmRo:660F3810rM",
   pblendvb_3 =	"rmRo:660F3810rM",
@@ -1404,6 +1441,238 @@ local map_op = {
   movntsd_2 =	"xr/qo:nF20F2BRm",
   movntsd_2 =	"xr/qo:nF20F2BRm",
   movntss_2 =	"xr/do:F30F2BRm",
   movntss_2 =	"xr/do:F30F2BRm",
   -- popcnt is also in SSE4.2
   -- popcnt is also in SSE4.2
+
+  -- AES-NI
+  aesdec_2 =	"rmo:660F38DErM",
+  aesdeclast_2 = "rmo:660F38DFrM",
+  aesenc_2 =	"rmo:660F38DCrM",
+  aesenclast_2 = "rmo:660F38DDrM",
+  aesimc_2 =	"rmo:660F38DBrM",
+  aeskeygenassist_3 = "rmio:660F3ADFrMU",
+  pclmulqdq_3 =	"rmio:660F3A44rMU",
+
+   -- AVX FP ops
+  vaddsubpd_3 =	"rrmoy:660FVD0rM",
+  vaddsubps_3 =	"rrmoy:F20FVD0rM",
+  vandpd_3 =	"rrmoy:660FV54rM",
+  vandps_3 =	"rrmoy:0FV54rM",
+  vandnpd_3 =	"rrmoy:660FV55rM",
+  vandnps_3 =	"rrmoy:0FV55rM",
+  vblendpd_4 =	"rrmioy:660F3AV0DrMU",
+  vblendps_4 =	"rrmioy:660F3AV0CrMU",
+  vblendvpd_4 =	"rrmroy:660F3AV4BrMs",
+  vblendvps_4 =	"rrmroy:660F3AV4ArMs",
+  vbroadcastf128_2 = "rx/yo:660F38u1ArM",
+  vcmppd_4 =	"rrmioy:660FVC2rMU",
+  vcmpps_4 =	"rrmioy:0FVC2rMU",
+  vcmpsd_4 =	"rrrio:F20FVC2rMU|rrxi/ooq:",
+  vcmpss_4 =	"rrrio:F30FVC2rMU|rrxi/ood:",
+  vcomisd_2 =	"rro:660Fu2FrM|rx/oq:",
+  vcomiss_2 =	"rro:0Fu2FrM|rx/od:",
+  vcvtdq2pd_2 =	"rro:F30FuE6rM|rx/oq:|rm/yo:",
+  vcvtdq2ps_2 =	"rmoy:0Fu5BrM",
+  vcvtpd2dq_2 =	"rmoy:F20FuE6rM",
+  vcvtpd2ps_2 =	"rmoy:660Fu5ArM",
+  vcvtps2dq_2 =	"rmoy:660Fu5BrM",
+  vcvtps2pd_2 =	"rro:0Fu5ArM|rx/oq:|rm/yo:",
+  vcvtsd2si_2 =	"rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
+  vcvtsd2ss_3 =	"rrro:F20FV5ArM|rrx/ooq:",
+  vcvtsi2sd_3 =	"rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
+  vcvtsi2ss_3 =	"rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
+  vcvtss2sd_3 =	"rrro:F30FV5ArM|rrx/ood:",
+  vcvtss2si_2 =	"rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
+  vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
+  vcvttps2dq_2 = "rmoy:F30Fu5BrM",
+  vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
+  vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
+  vdppd_4 =	"rrmio:660F3AV41rMU",
+  vdpps_4 =	"rrmioy:660F3AV40rMU",
+  vextractf128_3 = "mri/oy:660F3AuL19RmU",
+  vextractps_3 = "mri/do:660F3Au17RmU",
+  vhaddpd_3 =	"rrmoy:660FV7CrM",
+  vhaddps_3 =	"rrmoy:F20FV7CrM",
+  vhsubpd_3 =	"rrmoy:660FV7DrM",
+  vhsubps_3 =	"rrmoy:F20FV7DrM",
+  vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
+  vinsertps_4 =	"rrrio:660F3AV21rMU|rrxi/ood:",
+  vldmxcsr_1 =	"xd:0FuAE2m",
+  vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
+  vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
+  vmovapd_2 =	"rmoy:660Fu28rM|mroy:660Fu29Rm",
+  vmovaps_2 =	"rmoy:0Fu28rM|mroy:0Fu29Rm",
+  vmovd_2 =	"rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
+  vmovq_2 =	"rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
+  vmovddup_2 =	"rmy:F20Fu12rM|rro:|rx/oq:",
+  vmovhlps_3 =	"rrro:0FV12rM",
+  vmovhpd_2 =	"xr/qo:660Fu17Rm",
+  vmovhpd_3 =	"rrx/ooq:660FV16rM",
+  vmovhps_2 =	"xr/qo:0Fu17Rm",
+  vmovhps_3 =	"rrx/ooq:0FV16rM",
+  vmovlhps_3 =	"rrro:0FV16rM",
+  vmovlpd_2 =	"xr/qo:660Fu13Rm",
+  vmovlpd_3 =	"rrx/ooq:660FV12rM",
+  vmovlps_2 =	"xr/qo:0Fu13Rm",
+  vmovlps_3 =	"rrx/ooq:0FV12rM",
+  vmovmskpd_2 =	"rr/do:660Fu50rM|rr/dy:660FuL50rM",
+  vmovmskps_2 =	"rr/do:0Fu50rM|rr/dy:0FuL50rM",
+  vmovntpd_2 =	"xroy:660Fu2BRm",
+  vmovntps_2 =	"xroy:0Fu2BRm",
+  vmovsd_2 =	"rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
+  vmovsd_3 =	"rrro:F20FV10rM",
+  vmovshdup_2 =	"rmoy:F30Fu16rM",
+  vmovsldup_2 =	"rmoy:F30Fu12rM",
+  vmovss_2 =	"rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
+  vmovss_3 =	"rrro:F30FV10rM",
+  vmovupd_2 =	"rmoy:660Fu10rM|mroy:660Fu11Rm",
+  vmovups_2 =	"rmoy:0Fu10rM|mroy:0Fu11Rm",
+  vorpd_3 =	"rrmoy:660FV56rM",
+  vorps_3 =	"rrmoy:0FV56rM",
+  vpermilpd_3 =	"rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
+  vpermilps_3 =	"rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
+  vperm2f128_4 = "rrmiy:660F3AV06rMU",
+  vptestpd_2 =	"rmoy:660F38u0FrM",
+  vptestps_2 =	"rmoy:660F38u0ErM",
+  vrcpps_2 =	"rmoy:0Fu53rM",
+  vrcpss_3 =	"rrro:F30FV53rM|rrx/ood:",
+  vrsqrtps_2 =	"rmoy:0Fu52rM",
+  vrsqrtss_3 =	"rrro:F30FV52rM|rrx/ood:",
+  vroundpd_3 =	"rmioy:660F3AV09rMU",
+  vroundps_3 =	"rmioy:660F3AV08rMU",
+  vroundsd_4 =	"rrrio:660F3AV0BrMU|rrxi/ooq:",
+  vroundss_4 =	"rrrio:660F3AV0ArMU|rrxi/ood:",
+  vshufpd_4 =	"rrmioy:660FVC6rMU",
+  vshufps_4 =	"rrmioy:0FVC6rMU",
+  vsqrtps_2 =	"rmoy:0Fu51rM",
+  vsqrtss_2 =	"rro:F30Fu51rM|rx/od:",
+  vsqrtpd_2 =	"rmoy:660Fu51rM",
+  vsqrtsd_2 =	"rro:F20Fu51rM|rx/oq:",
+  vstmxcsr_1 =	"xd:0FuAE3m",
+  vucomisd_2 =	"rro:660Fu2ErM|rx/oq:",
+  vucomiss_2 =	"rro:0Fu2ErM|rx/od:",
+  vunpckhpd_3 =	"rrmoy:660FV15rM",
+  vunpckhps_3 =	"rrmoy:0FV15rM",
+  vunpcklpd_3 =	"rrmoy:660FV14rM",
+  vunpcklps_3 =	"rrmoy:0FV14rM",
+  vxorpd_3 =	"rrmoy:660FV57rM",
+  vxorps_3 =	"rrmoy:0FV57rM",
+  vzeroall_0 =	"0FuL77",
+  vzeroupper_0 = "0Fu77",
+
+  -- AVX2 FP ops
+  vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
+  vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
+  -- *vgather* (!vsib)
+  vpermpd_3 =	"rmiy:660F3AuX01rMU",
+  vpermps_3 =	"rrmy:660F38V16rM",
+
+  -- AVX, AVX2 integer ops
+  -- In general, xmm requires AVX, ymm requires AVX2.
+  vaesdec_3 =  "rrmo:660F38VDErM",
+  vaesdeclast_3 = "rrmo:660F38VDFrM",
+  vaesenc_3 =  "rrmo:660F38VDCrM",
+  vaesenclast_3 = "rrmo:660F38VDDrM",
+  vaesimc_2 =  "rmo:660F38uDBrM",
+  vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
+  vlddqu_2 =	"rxoy:F20FuF0rM",
+  vmaskmovdqu_2 = "rro:660FuF7rM",
+  vmovdqa_2 =	"rmoy:660Fu6FrM|mroy:660Fu7FRm",
+  vmovdqu_2 =	"rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
+  vmovntdq_2 =	"xroy:660FuE7Rm",
+  vmovntdqa_2 =	"rxoy:660F38u2ArM",
+  vmpsadbw_4 =	"rrmioy:660F3AV42rMU",
+  vpabsb_2 =	"rmoy:660F38u1CrM",
+  vpabsd_2 =	"rmoy:660F38u1ErM",
+  vpabsw_2 =	"rmoy:660F38u1DrM",
+  vpackusdw_3 =	"rrmoy:660F38V2BrM",
+  vpalignr_4 =	"rrmioy:660F3AV0FrMU",
+  vpblendvb_4 =	"rrmroy:660F3AV4CrMs",
+  vpblendw_4 =	"rrmioy:660F3AV0ErMU",
+  vpclmulqdq_4 = "rrmio:660F3AV44rMU",
+  vpcmpeqq_3 =	"rrmoy:660F38V29rM",
+  vpcmpestri_3 = "rmio:660F3Au61rMU",
+  vpcmpestrm_3 = "rmio:660F3Au60rMU",
+  vpcmpgtq_3 =	"rrmoy:660F38V37rM",
+  vpcmpistri_3 = "rmio:660F3Au63rMU",
+  vpcmpistrm_3 = "rmio:660F3Au62rMU",
+  vpextrb_3 =	"rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
+  vpextrw_3 =	"rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
+  vpextrd_3 =	"mri/do:660F3Au16RmU",
+  vpextrq_3 =	"mri/qo:660F3Au16RmU",
+  vphaddw_3 =	"rrmoy:660F38V01rM",
+  vphaddd_3 =	"rrmoy:660F38V02rM",
+  vphaddsw_3 =	"rrmoy:660F38V03rM",
+  vphminposuw_2 = "rmo:660F38u41rM",
+  vphsubw_3 =	"rrmoy:660F38V05rM",
+  vphsubd_3 =	"rrmoy:660F38V06rM",
+  vphsubsw_3 =	"rrmoy:660F38V07rM",
+  vpinsrb_4 =	"rrri/ood:660F3AV20rMU|rrxi/oob:",
+  vpinsrw_4 =	"rrri/ood:660FVC4rMU|rrxi/oow:",
+  vpinsrd_4 =	"rrmi/ood:660F3AV22rMU",
+  vpinsrq_4 =	"rrmi/ooq:660F3AVX22rMU",
+  vpmaddubsw_3 = "rrmoy:660F38V04rM",
+  vpmaxsb_3 =	"rrmoy:660F38V3CrM",
+  vpmaxsd_3 =	"rrmoy:660F38V3DrM",
+  vpmaxuw_3 =	"rrmoy:660F38V3ErM",
+  vpmaxud_3 =	"rrmoy:660F38V3FrM",
+  vpminsb_3 =	"rrmoy:660F38V38rM",
+  vpminsd_3 =	"rrmoy:660F38V39rM",
+  vpminuw_3 =	"rrmoy:660F38V3ArM",
+  vpminud_3 =	"rrmoy:660F38V3BrM",
+  vpmovmskb_2 =	"rr/do:660FuD7rM|rr/dy:660FuLD7rM",
+  vpmovsxbw_2 =	"rroy:660F38u20rM|rx/oq:|rx/yo:",
+  vpmovsxbd_2 =	"rroy:660F38u21rM|rx/od:|rx/yq:",
+  vpmovsxbq_2 =	"rroy:660F38u22rM|rx/ow:|rx/yd:",
+  vpmovsxwd_2 =	"rroy:660F38u23rM|rx/oq:|rx/yo:",
+  vpmovsxwq_2 =	"rroy:660F38u24rM|rx/od:|rx/yq:",
+  vpmovsxdq_2 =	"rroy:660F38u25rM|rx/oq:|rx/yo:",
+  vpmovzxbw_2 =	"rroy:660F38u30rM|rx/oq:|rx/yo:",
+  vpmovzxbd_2 =	"rroy:660F38u31rM|rx/od:|rx/yq:",
+  vpmovzxbq_2 =	"rroy:660F38u32rM|rx/ow:|rx/yd:",
+  vpmovzxwd_2 =	"rroy:660F38u33rM|rx/oq:|rx/yo:",
+  vpmovzxwq_2 =	"rroy:660F38u34rM|rx/od:|rx/yq:",
+  vpmovzxdq_2 =	"rroy:660F38u35rM|rx/oq:|rx/yo:",
+  vpmuldq_3 =	"rrmoy:660F38V28rM",
+  vpmulhrsw_3 =	"rrmoy:660F38V0BrM",
+  vpmulld_3 =	"rrmoy:660F38V40rM",
+  vpshufb_3 =	"rrmoy:660F38V00rM",
+  vpshufd_3 =	"rmioy:660Fu70rMU",
+  vpshufhw_3 =	"rmioy:F30Fu70rMU",
+  vpshuflw_3 =	"rmioy:F20Fu70rMU",
+  vpsignb_3 =	"rrmoy:660F38V08rM",
+  vpsignw_3 =	"rrmoy:660F38V09rM",
+  vpsignd_3 =	"rrmoy:660F38V0ArM",
+  vpslldq_3 =	"rrioy:660Fv737mU",
+  vpsllw_3 =	"rrmoy:660FVF1rM|rrioy:660Fv716mU",
+  vpslld_3 =	"rrmoy:660FVF2rM|rrioy:660Fv726mU",
+  vpsllq_3 =	"rrmoy:660FVF3rM|rrioy:660Fv736mU",
+  vpsraw_3 =	"rrmoy:660FVE1rM|rrioy:660Fv714mU",
+  vpsrad_3 =	"rrmoy:660FVE2rM|rrioy:660Fv724mU",
+  vpsrldq_3 =	"rrioy:660Fv733mU",
+  vpsrlw_3 =	"rrmoy:660FVD1rM|rrioy:660Fv712mU",
+  vpsrld_3 =	"rrmoy:660FVD2rM|rrioy:660Fv722mU",
+  vpsrlq_3 =	"rrmoy:660FVD3rM|rrioy:660Fv732mU",
+  vptest_2 =	"rmoy:660F38u17rM",
+
+  -- AVX2 integer ops
+  vbroadcasti128_2 = "rx/yo:660F38u5ArM",
+  vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
+  vextracti128_3 = "mri/oy:660F3AuL39RmU",
+  vpblendd_4 =	"rrmioy:660F3AV02rMU",
+  vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
+  vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
+  vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
+  vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
+  vpermd_3 =	"rrmy:660F38V36rM",
+  vpermq_3 =	"rmiy:660F3AuX00rMU",
+  -- *vpgather* (!vsib)
+  vperm2i128_4 = "rrmiy:660F3AV46rMU",
+  vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
+  vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
+  vpsllvd_3 =	"rrmoy:660F38V47rM",
+  vpsllvq_3 =	"rrmoy:660F38VX47rM",
+  vpsravd_3 =	"rrmoy:660F38V46rM",
+  vpsrlvd_3 =	"rrmoy:660F38V45rM",
+  vpsrlvq_3 =	"rrmoy:660F38VX45rM",
 }
 }
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
@@ -1454,28 +1723,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
   map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
   map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
 end
 end
 
 
--- SSE FP arithmetic ops.
+-- SSE / AVX FP arithmetic ops.
 for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
 for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
 		     sub = 12, min = 13, div = 14, max = 15 } do
 		     sub = 12, min = 13, div = 14, max = 15 } do
   map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
   map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
   map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
   map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
   map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
   map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
   map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
   map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
+  if n ~= 1 then
+    map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
+    map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
+    map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
+    map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
+  end
+end
+
+-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
+for name,n in pairs{
+  paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
+  paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
+  packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
+  paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
+  pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
+  pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
+  pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
+  pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
+  pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
+  pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
+  psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
+  psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
+  punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
+  punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
+  punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
+} do
+  map_op[name.."_2"] = format("rmo:660F%02XrM", n)
+  map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
 end
 end
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
+local map_vexarg = { u = false, v = 1, V = 2 }
+
 -- Process pattern string.
 -- Process pattern string.
 local function dopattern(pat, args, sz, op, needrex)
 local function dopattern(pat, args, sz, op, needrex)
-  local digit, addin
+  local digit, addin, vex
   local opcode = 0
   local opcode = 0
   local szov = sz
   local szov = sz
   local narg = 1
   local narg = 1
   local rex = 0
   local rex = 0
 
 
   -- Limit number of section buffer positions used by a single dasm_put().
   -- Limit number of section buffer positions used by a single dasm_put().
-  -- A single opcode needs a maximum of 5 positions.
-  if secpos+5 > maxsecpos then wflush() end
+  -- A single opcode needs a maximum of 6 positions.
+  if secpos+6 > maxsecpos then wflush() end
 
 
   -- Process each character.
   -- Process each character.
   for c in gmatch(pat.."|", ".") do
   for c in gmatch(pat.."|", ".") do
@@ -1489,6 +1788,8 @@ local function dopattern(pat, args, sz, op, needrex)
       szov = nil
       szov = nil
     elseif c == "X" then	-- Force REX.W.
     elseif c == "X" then	-- Force REX.W.
       rex = 8
       rex = 8
+    elseif c == "L" then	-- Force VEX.L.
+      vex.l = true
     elseif c == "r" then	-- Merge 1st operand regno. into opcode.
     elseif c == "r" then	-- Merge 1st operand regno. into opcode.
       addin = args[1]; opcode = opcode + (addin.reg % 8)
       addin = args[1]; opcode = opcode + (addin.reg % 8)
       if narg < 2 then narg = 2 end
       if narg < 2 then narg = 2 end
@@ -1512,21 +1813,42 @@ local function dopattern(pat, args, sz, op, needrex)
       if t.xreg and t.xreg > 7 then rex = rex + 2 end
       if t.xreg and t.xreg > 7 then rex = rex + 2 end
       if s > 7 then rex = rex + 4 end
       if s > 7 then rex = rex + 4 end
       if needrex then rex = rex + 16 end
       if needrex then rex = rex + 16 end
-      wputop(szov, opcode, rex); opcode = nil
+      local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
+      opcode = nil
       local imark = sub(pat, -1) -- Force a mark (ugly).
       local imark = sub(pat, -1) -- Force a mark (ugly).
       -- Put ModRM/SIB with regno/last digit as spare.
       -- Put ModRM/SIB with regno/last digit as spare.
-      wputmrmsib(t, imark, s, addin and addin.vreg)
+      wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
       addin = nil
       addin = nil
+    elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
+      local b = band(opcode, 255); opcode = shr(opcode, 8)
+      local m = 1
+      if b == 0x38 then m = 2
+      elseif b == 0x3a then m = 3 end
+      if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
+      if b ~= 0x0f then
+	werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
+	  "' in pattern `"..pat.."' for `"..op.."'")
+      end
+      local v = map_vexarg[c]
+      if v then v = remove(args, v) end
+      b = band(opcode, 255)
+      local p = 0
+      if b == 0x66 then p = 1
+      elseif b == 0xf3 then p = 2
+      elseif b == 0xf2 then p = 3 end
+      if p ~= 0 then opcode = shr(opcode, 8) end
+      if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
+      vex = { m = m, p = p, v = v }
     else
     else
       if opcode then -- Flush opcode.
       if opcode then -- Flush opcode.
 	if szov == "q" and rex == 0 then rex = rex + 8 end
 	if szov == "q" and rex == 0 then rex = rex + 8 end
 	if needrex then rex = rex + 16 end
 	if needrex then rex = rex + 16 end
 	if addin and addin.reg == -1 then
 	if addin and addin.reg == -1 then
-	  wputop(szov, opcode - 7, rex)
-	  waction("VREG", addin.vreg); wputxb(0)
+	  local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
+	  wvreg("opcode", addin.vreg, psz, sk)
 	else
 	else
 	  if addin and addin.reg > 7 then rex = rex + 1 end
 	  if addin and addin.reg > 7 then rex = rex + 1 end
-	  wputop(szov, opcode, rex)
+	  wputop(szov, opcode, rex, vex)
 	end
 	end
 	opcode = nil
 	opcode = nil
       end
       end
@@ -1563,6 +1885,14 @@ local function dopattern(pat, args, sz, op, needrex)
 	  else
 	  else
 	    wputlabel("REL_", imm, 2)
 	    wputlabel("REL_", imm, 2)
 	  end
 	  end
+	elseif c == "s" then
+	  local reg = a.reg
+	  if reg < 0 then
+	    wputb(0)
+	    wvreg("imm.hi", a.vreg)
+	  else
+	    wputb(shl(reg, 4))
+	  end
 	else
 	else
 	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
 	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
 	end
 	end
@@ -1639,11 +1969,14 @@ map_op[".template__"] = function(params, template, nparams)
     if pat == "" then pat = lastpat else lastpat = pat end
     if pat == "" then pat = lastpat else lastpat = pat end
     if matchtm(tm, args) then
     if matchtm(tm, args) then
       local prefix = sub(szm, 1, 1)
       local prefix = sub(szm, 1, 1)
-      if prefix == "/" then -- Match both operand sizes.
-	if args[1].opsize == sub(szm, 2, 2) and
-	   args[2].opsize == sub(szm, 3, 3) then
-	  dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
-	  return
+      if prefix == "/" then -- Exactly match leading operand sizes.
+	for i = #szm,1,-1 do
+	  if i == 1 then
+	    dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
+	    return
+	  elseif args[i-1].opsize ~= sub(szm, i, i) then
+	    break
+	  end
 	end
 	end
       else -- Match common operand size.
       else -- Match common operand size.
 	local szp = sz
 	local szp = sz
@@ -1708,8 +2041,8 @@ if x64 then
 	rex = a.reg > 7 and 9 or 8
 	rex = a.reg > 7 and 9 or 8
       end
       end
     end
     end
-    wputop(sz, opcode, rex)
-    if vreg then waction("VREG", vreg); wputxb(0) end
+    local psz, sk = wputop(sz, opcode, rex, nil, vreg)
+    wvreg("opcode", vreg, psz, sk)
     waction("IMM_D", format("(unsigned int)(%s)", op64))
     waction("IMM_D", format("(unsigned int)(%s)", op64))
     waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
     waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
   end
   end

+ 5 - 5
Source/ThirdParty/LuaJIT/dynasm/dynasm.lua

@@ -2,7 +2,7 @@
 -- DynASM. A dynamic assembler for code generation engines.
 -- DynASM. A dynamic assembler for code generation engines.
 -- Originally designed and implemented for LuaJIT.
 -- Originally designed and implemented for LuaJIT.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See below for full copyright notice.
 -- See below for full copyright notice.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
@@ -10,14 +10,14 @@
 local _info = {
 local _info = {
   name =	"DynASM",
   name =	"DynASM",
   description =	"A dynamic assembler for code generation engines",
   description =	"A dynamic assembler for code generation engines",
-  version =	"1.3.0",
-  vernum =	 10300,
-  release =	"2011-05-05",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
   author =	"Mike Pall",
   author =	"Mike Pall",
   url =		"http://luajit.org/dynasm.html",
   url =		"http://luajit.org/dynasm.html",
   license =	"MIT",
   license =	"MIT",
   copyright =	[[
   copyright =	[[
-Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 
 
 Permission is hereby granted, free of charge, to any person obtaining
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 a copy of this software and associated documentation files (the

+ 14 - 12
Source/ThirdParty/LuaJIT/src/host/buildvm.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder.
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** This is a tool to build the hand-tuned assembler code required for
 ** This is a tool to build the hand-tuned assembler code required for
 ** LuaJIT's bytecode interpreter. It supports a variety of output formats
 ** LuaJIT's bytecode interpreter. It supports a variety of output formats
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
 #include "../dynasm/dasm_x86.h"
 #include "../dynasm/dasm_x86.h"
 #elif LJ_TARGET_ARM
 #elif LJ_TARGET_ARM
 #include "../dynasm/dasm_arm.h"
 #include "../dynasm/dasm_arm.h"
+#elif LJ_TARGET_ARM64
+#include "../dynasm/dasm_arm64.h"
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 #include "../dynasm/dasm_ppc.h"
 #include "../dynasm/dasm_ppc.h"
-#elif LJ_TARGET_PPCSPE
-#include "../dynasm/dasm_ppc.h"
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 #include "../dynasm/dasm_mips.h"
 #include "../dynasm/dasm_mips.h"
 #else
 #else
@@ -113,8 +113,8 @@ static const char *sym_decorate(BuildCtx *ctx,
       name[0] = '@';
       name[0] = '@';
     else
     else
       *p = '\0';
       *p = '\0';
-#elif (LJ_TARGET_PPC  || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE
-    /* Keep @plt. */
+#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
+    /* Keep @plt etc. */
 #else
 #else
     *p = '\0';
     *p = '\0';
 #endif
 #endif
@@ -179,6 +179,7 @@ static int build_code(BuildCtx *ctx)
   ctx->nreloc = 0;
   ctx->nreloc = 0;
 
 
   ctx->globnames = globnames;
   ctx->globnames = globnames;
+  ctx->extnames = extnames;
   ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
   ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
   ctx->nrelocsym = 0;
   ctx->nrelocsym = 0;
   for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
   for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -320,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx)
   char buf[80];
   char buf[80];
   int i;
   int i;
   fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
   fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
-  fprintf(ctx->fp, "module(...)\n\n");
+  fprintf(ctx->fp, "return {\n\n");
 
 
   fprintf(ctx->fp, "bcnames = \"");
   fprintf(ctx->fp, "bcnames = \"");
   for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
   for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
 
   fprintf(ctx->fp, "irnames = \"");
   fprintf(ctx->fp, "irnames = \"");
   for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
   for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
 
   fprintf(ctx->fp, "irfpm = { [0]=");
   fprintf(ctx->fp, "irfpm = { [0]=");
   for (i = 0; irfpm_names[i]; i++)
   for (i = 0; irfpm_names[i]; i++)
     fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
     fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
 
   fprintf(ctx->fp, "irfield = { [0]=");
   fprintf(ctx->fp, "irfield = { [0]=");
   for (i = 0; irfield_names[i]; i++) {
   for (i = 0; irfield_names[i]; i++) {
@@ -343,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx)
     if (p) *p = '.';
     if (p) *p = '.';
     fprintf(ctx->fp, "\"%s\", ", buf);
     fprintf(ctx->fp, "\"%s\", ", buf);
   }
   }
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
 
   fprintf(ctx->fp, "ircall = {\n[0]=");
   fprintf(ctx->fp, "ircall = {\n[0]=");
   for (i = 0; ircall_names[i]; i++)
   for (i = 0; ircall_names[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
     fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
 
   fprintf(ctx->fp, "traceerr = {\n[0]=");
   fprintf(ctx->fp, "traceerr = {\n[0]=");
   for (i = 0; trace_errors[i]; i++)
   for (i = 0; trace_errors[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
     fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 }
 }
 
 
 /* -- Argument parsing ---------------------------------------------------- */
 /* -- Argument parsing ---------------------------------------------------- */
@@ -490,6 +491,7 @@ int main(int argc, char **argv)
   case BUILD_vmdef:
   case BUILD_vmdef:
     emit_vmdef(ctx);
     emit_vmdef(ctx);
     emit_lib(ctx);
     emit_lib(ctx);
+    fprintf(ctx->fp, "}\n\n");
     break;
     break;
   case BUILD_ffdef:
   case BUILD_ffdef:
   case BUILD_libdef:
   case BUILD_libdef:

+ 2 - 1
Source/ThirdParty/LuaJIT/src/host/buildvm.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder.
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _BUILDVM_H
 #ifndef _BUILDVM_H
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
   const char *beginsym;
   const char *beginsym;
   /* Strings generated by DynASM. */
   /* Strings generated by DynASM. */
   const char *const *globnames;
   const char *const *globnames;
+  const char *const *extnames;
   const char *dasm_ident;
   const char *dasm_ident;
   const char *dasm_arch;
   const char *dasm_arch;
   /* Relocations. */
   /* Relocations. */

+ 52 - 11
Source/ThirdParty/LuaJIT/src/host/buildvm_asm.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder: Assembler source code emitter.
 ** LuaJIT VM builder: Assembler source code emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "buildvm.h"
 #include "buildvm.h"
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
   "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
   "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
 };
 };
 
 
-/* Emit relocation for the incredibly stupid OSX assembler. */
-static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
+/* Emit x86/x64 text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
 				const char *sym)
 				const char *sym)
 {
 {
   const char *opname = NULL;
   const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
     exit(1);
     exit(1);
   }
   }
   emit_asm_bytes(ctx, cp, n);
   emit_asm_bytes(ctx, cp, n);
+  if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+    /* Various fixups for external symbols outside of our binary. */
+    if (ctx->mode == BUILD_elfasm) {
+      if (LJ_32)
+	fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
+      fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
+      if (LJ_32)
+	fprintf(ctx->fp, "#endif\n");
+      return;
+    } else if (LJ_32 && ctx->mode == BUILD_machasm) {
+      fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
+      return;
+    }
+  }
   fprintf(ctx->fp, "\t%s %s\n", opname, sym);
   fprintf(ctx->fp, "\t%s %s\n", opname, sym);
 }
 }
 #else
 #else
@@ -107,7 +121,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
 	    ins, sym);
 	    ins, sym);
     exit(1);
     exit(1);
   }
   }
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+  if ((ins >> 26) == 0x25u) {
+    fprintf(ctx->fp, "\tbl %s\n", sym);
+  } else {
+    fprintf(stderr,
+	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
+	    ins, sym);
+    exit(1);
+  }
+#elif LJ_TARGET_PPC
 #if LJ_TARGET_PS3
 #if LJ_TARGET_PS3
 #define TOCPREFIX "."
 #define TOCPREFIX "."
 #else
 #else
@@ -117,6 +140,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
     fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
     fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
 	    (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
 	    (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
   } else if ((ins >> 26) == 18) {
   } else if ((ins >> 26) == 18) {
+#if LJ_ARCH_PPC64
+    const char *suffix = strchr(sym, '@');
+    if (suffix && suffix[1] == 'h') {
+      fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
+    } else if (suffix && suffix[1] == 'l') {
+      fprintf(ctx->fp, "\tld 12, %s\n", sym);
+    } else
+#endif
     fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
     fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
   } else {
   } else {
     fprintf(stderr,
     fprintf(stderr,
@@ -214,6 +245,9 @@ void emit_asm(BuildCtx *ctx)
   int i, rel;
   int i, rel;
 
 
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+#if LJ_ARCH_PPC64
+  fprintf(ctx->fp, "\t.abiversion 2\n");
+#endif
   fprintf(ctx->fp, "\t.text\n");
   fprintf(ctx->fp, "\t.text\n");
   emit_asm_align(ctx, 4);
   emit_asm_align(ctx, 4);
 
 
@@ -227,11 +261,20 @@ void emit_asm(BuildCtx *ctx)
 
 
 #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
 #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
   /* This should really be moved into buildvm_arm.dasc. */
   /* This should really be moved into buildvm_arm.dasc. */
+#if LJ_ARCH_HASFPU
+  fprintf(ctx->fp,
+	  ".fnstart\n"
+	  ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n"
+	  ".vsave {d8-d15}\n"
+	  ".save {r4}\n"
+	  ".pad #28\n");
+#else
   fprintf(ctx->fp,
   fprintf(ctx->fp,
 	  ".fnstart\n"
 	  ".fnstart\n"
 	  ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
 	  ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
 	  ".pad #28\n");
 	  ".pad #28\n");
 #endif
 #endif
+#endif
 #if LJ_TARGET_MIPS
 #if LJ_TARGET_MIPS
   fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
   fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
 #endif
 #endif
@@ -254,8 +297,9 @@ void emit_asm(BuildCtx *ctx)
       BuildReloc *r = &ctx->reloc[rel];
       BuildReloc *r = &ctx->reloc[rel];
       int n = r->ofs - ofs;
       int n = r->ofs - ofs;
 #if LJ_TARGET_X86ORX64
 #if LJ_TARGET_X86ORX64
-      if (ctx->mode == BUILD_machasm && r->type != 0) {
-	emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+      if (r->type != 0 &&
+	  (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
+	emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
       } else {
       } else {
 	emit_asm_bytes(ctx, ctx->code+ofs, n);
 	emit_asm_bytes(ctx, ctx->code+ofs, n);
 	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
 	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -286,13 +330,10 @@ void emit_asm(BuildCtx *ctx)
   fprintf(ctx->fp, "\n");
   fprintf(ctx->fp, "\n");
   switch (ctx->mode) {
   switch (ctx->mode) {
   case BUILD_elfasm:
   case BUILD_elfasm:
-#if !LJ_TARGET_PS3
+#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
     fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
     fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
 #endif
 #endif
-#if LJ_TARGET_PPCSPE
-    /* Soft-float ABI + SPE. */
-    fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
-#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3
     /* Hard-float ABI. */
     /* Hard-float ABI. */
     fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
     fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
 #endif
 #endif

+ 1 - 1
Source/ThirdParty/LuaJIT/src/host/buildvm_fold.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder: IR folding hash table generator.
 ** LuaJIT VM builder: IR folding hash table generator.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "buildvm.h"
 #include "buildvm.h"

+ 61 - 2
Source/ThirdParty/LuaJIT/src/host/buildvm_lib.c

@@ -1,11 +1,13 @@
 /*
 /*
 ** LuaJIT VM builder: library definition compiler.
 ** LuaJIT VM builder: library definition compiler.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "buildvm.h"
 #include "buildvm.h"
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_bc.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
+#include "buildvm_libbc.h"
 
 
 /* Context for library definitions. */
 /* Context for library definitions. */
 static uint8_t obuf[8192];
 static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
   regfunc = REGFUNC_OK;
   regfunc = REGFUNC_OK;
 }
 }
 
 
+static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
+{
+  uint32_t v = *p++;
+  if (v >= 0x80) {
+    int sh = 0; v &= 0x7f;
+    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+  }
+  *vv = v;
+  return p;
+}
+
+static void libdef_fixupbc(uint8_t *p)
+{
+  uint32_t i, sizebc;
+  p += 4;
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  for (i = 0; i < sizebc; i++, p += 4) {
+    uint8_t op = p[libbc_endian ? 3 : 0];
+    uint8_t ra = p[libbc_endian ? 2 : 1];
+    uint8_t rc = p[libbc_endian ? 1 : 2];
+    uint8_t rb = p[libbc_endian ? 0 : 3];
+    if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
+      op = BC_ISNUM; rc++;
+    }
+    p[LJ_ENDIAN_SELECT(0, 3)] = op;
+    p[LJ_ENDIAN_SELECT(1, 2)] = ra;
+    p[LJ_ENDIAN_SELECT(2, 1)] = rc;
+    p[LJ_ENDIAN_SELECT(3, 0)] = rb;
+  }
+}
+
+static void libdef_lua(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(arg);
+  if (ctx->mode == BUILD_libdef) {
+    int i;
+    for (i = 0; libbc_map[i].name != NULL; i++) {
+      if (!strcmp(libbc_map[i].name, p)) {
+	int ofs = libbc_map[i].ofs;
+	int len = libbc_map[i+1].ofs - ofs;
+	obuf[2]++;  /* Bump hash table size. */
+	*optr++ = LIBINIT_LUA;
+	libdef_name(p, 0);
+	memcpy(optr, libbc_code + ofs, len);
+	libdef_fixupbc(optr);
+	optr += len;
+	return;
+      }
+    }
+    fprintf(stderr, "Error: missing libbc definition for %s\n", p);
+    exit(1);
+  }
+}
+
 static uint32_t find_rec(char *name)
 static uint32_t find_rec(char *name)
 {
 {
   char *p = (char *)obuf;
   char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
   { "CF(",	")",		libdef_func,		LIBINIT_CF },
   { "CF(",	")",		libdef_func,		LIBINIT_CF },
   { "ASM(",	")",		libdef_func,		LIBINIT_ASM },
   { "ASM(",	")",		libdef_func,		LIBINIT_ASM },
   { "ASM_(",	")",		libdef_func,		LIBINIT_ASM_ },
   { "ASM_(",	")",		libdef_func,		LIBINIT_ASM_ },
+  { "LUA(",	")",		libdef_lua,		0 },
   { "REC(",	")",		libdef_rec,		0 },
   { "REC(",	")",		libdef_rec,		0 },
   { "PUSH(",	")",		libdef_push,		0 },
   { "PUSH(",	")",		libdef_push,		0 },
   { "SET(",	")",		libdef_set,		0 },
   { "SET(",	")",		libdef_set,		0 },
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx)
       "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
       "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
       ffasmfunc);
       ffasmfunc);
   } else if (ctx->mode == BUILD_vmdef) {
   } else if (ctx->mode == BUILD_vmdef) {
-    fprintf(ctx->fp, "}\n\n");
+    fprintf(ctx->fp, "},\n\n");
   } else if (ctx->mode == BUILD_bcdef) {
   } else if (ctx->mode == BUILD_bcdef) {
     int i;
     int i;
     fprintf(ctx->fp, "\n};\n\n");
     fprintf(ctx->fp, "\n};\n\n");

+ 45 - 0
Source/ThirdParty/LuaJIT/src/host/buildvm_libbc.h

@@ -0,0 +1,45 @@
+/* This is a generated file. DO NOT EDIT! */
+
+static const int libbc_endian = 0;
+
+static const uint8_t libbc_code[] = {
+#if LJ_FR2
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
+0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0
+#else
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
+0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0
+#endif
+};
+
+static const struct { const char *name; int ofs; } libbc_map[] = {
+{"math_deg",0},
+{"math_rad",25},
+{"string_len",50},
+{"table_foreachi",69},
+{"table_foreach",136},
+{"table_getn",207},
+{"table_remove",226},
+{NULL,355}
+};
+

+ 1 - 1
Source/ThirdParty/LuaJIT/src/host/buildvm_peobj.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT VM builder: PE object emitter.
 ** LuaJIT VM builder: PE object emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Only used for building on Windows, since we cannot assume the presence
 ** Only used for building on Windows, since we cannot assume the presence
 ** of a suitable assembler. The host and target byte order must match.
 ** of a suitable assembler. The host and target byte order must match.

+ 197 - 0
Source/ThirdParty/LuaJIT/src/host/genlibbc.lua

@@ -0,0 +1,197 @@
+----------------------------------------------------------------------------
+-- Lua script to dump the bytecode of the library functions written in Lua.
+-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
+----------------------------------------------------------------------------
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+
+local ffi = require("ffi")
+local bit = require("bit")
+local vmdef = require("jit.vmdef")
+local bcnames = vmdef.bcnames
+
+local format = string.format
+
+local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
+
+local function usage(arg)
+  io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
+		  " [-o buildvm_libbc.h] lib_*.c\n")
+  os.exit(1)
+end
+
+local function parse_arg(arg)
+  local outfile = "-"
+  if not (arg and arg[1]) then
+    usage(arg)
+  end
+  if arg[1] == "-o" then
+    outfile = arg[2]
+    if not outfile then usage(arg) end
+    table.remove(arg, 1)
+    table.remove(arg, 1)
+  end
+  return outfile
+end
+
+local function read_files(names)
+  local src = ""
+  for _,name in ipairs(names) do
+    local fp = assert(io.open(name))
+    src = src .. fp:read("*a")
+    fp:close()
+  end
+  return src
+end
+
+local function transform_lua(code)
+  local fixup = {}
+  local n = -30000
+  code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
+    n = n + 1
+    fixup[n] = { "CHECK", tp }
+    return format("%s=%d", var, n)
+  end)
+  code = string.gsub(code, "PAIRS%((.-)%)", function(var)
+    fixup.PAIRS = true
+    return format("nil, %s, 0", var)
+  end)
+  return "return "..code, fixup
+end
+
+local function read_uleb128(p)
+  local v = p[0]; p = p + 1
+  if v >= 128 then
+    local sh = 7; v = v - 128
+    repeat
+      local r = p[0]
+      v = v + bit.lshift(bit.band(r, 127), sh)
+      sh = sh + 7
+      p = p + 1
+    until r < 128
+  end
+  return p, v
+end
+
+-- ORDER LJ_T
+local name2itype = {
+  str = 5, func = 9, tab = 12, int = 14, num = 15
+}
+
+local BC = {}
+for i=0,#bcnames/6-1 do
+  BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
+end
+local xop, xra = isbe and 3 or 0, isbe and 2 or 1
+local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
+
+local function fixup_dump(dump, fixup)
+  local buf = ffi.new("uint8_t[?]", #dump+1, dump)
+  local p = buf+5
+  local n, sizebc
+  p, n = read_uleb128(p)
+  local start = p
+  p = p + 4
+  p = read_uleb128(p)
+  p = read_uleb128(p)
+  p, sizebc = read_uleb128(p)
+  local rawtab = {}
+  for i=0,sizebc-1 do
+    local op = p[xop]
+    if op == BC.KSHORT then
+      local rd = p[xrc] + 256*p[xrb]
+      rd = bit.arshift(bit.lshift(rd, 16), 16)
+      local f = fixup[rd]
+      if f then
+	if f[1] == "CHECK" then
+	  local tp = f[2]
+	  if tp == "tab" then rawtab[p[xra]] = true end
+	  p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
+	  p[xrb] = 0
+	  p[xrc] = name2itype[tp]
+	else
+	  error("unhandled fixup type: "..f[1])
+	end
+      end
+    elseif op == BC.TGETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TGETR
+      end
+    elseif op == BC.TSETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TSETR
+      end
+    elseif op == BC.ITERC then
+      if fixup.PAIRS then
+	p[xop] = BC.ITERN
+      end
+    end
+    p = p + 4
+  end
+  return ffi.string(start, n)
+end
+
+local function find_defs(src)
+  local defs = {}
+  for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
+    local env = {}
+    local tcode, fixup = transform_lua(code)
+    local func = assert(load(tcode, "", nil, env))()
+    defs[name] = fixup_dump(string.dump(func, true), fixup)
+    defs[#defs+1] = name
+  end
+  return defs
+end
+
+local function gen_header(defs)
+  local t = {}
+  local function w(x) t[#t+1] = x end
+  w("/* This is a generated file. DO NOT EDIT! */\n\n")
+  w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
+  local s = ""
+  for _,name in ipairs(defs) do
+    s = s .. defs[name]
+  end
+  w("static const uint8_t libbc_code[] = {\n")
+  local n = 0
+  for i=1,#s do
+    local x = string.byte(s, i)
+    w(x); w(",")
+    n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
+    if n >= 75 then n = 0; w("\n") end
+  end
+  w("0\n};\n\n")
+  w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
+  local m = 0
+  for _,name in ipairs(defs) do
+    w('{"'); w(name); w('",'); w(m) w('},\n')
+    m = m + #defs[name]
+  end
+  w("{NULL,"); w(m); w("}\n};\n\n")
+  return table.concat(t)
+end
+
+local function write_file(name, data)
+  if name == "-" then
+    assert(io.write(data))
+    assert(io.flush())
+  else
+    local fp = io.open(name)
+    if fp then
+      local old = fp:read("*a")
+      fp:close()
+      if data == old then return end
+    end
+    fp = assert(io.open(name, "w"))
+    assert(fp:write(data))
+    assert(fp:close())
+  end
+end
+
+local outfile = parse_arg(arg)
+local src = read_files(arg)
+local defs = find_defs(src)
+local hdr = gen_header(defs)
+write_file(outfile, hdr)
+

+ 1 - 1
Source/ThirdParty/LuaJIT/src/host/genminilua.lua

@@ -2,7 +2,7 @@
 -- Lua script to generate a customized, minified version of Lua.
 -- Lua script to generate a customized, minified version of Lua.
 -- The resulting 'minilua' is used for the build process of LuaJIT.
 -- The resulting 'minilua' is used for the build process of LuaJIT.
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 
 

+ 10 - 11
Source/ThirdParty/LuaJIT/src/jit/bc.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT bytecode listing module.
 -- LuaJIT bytecode listing module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 --
 --
@@ -41,7 +41,7 @@
 
 
 -- Cache some library functions and objects.
 -- Cache some library functions and objects.
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20003, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local vmdef = require("jit.vmdef")
 local bit = require("bit")
 local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-line = bcline
-dump = bcdump
-targets = bctargets
-
-on = bcliston
-off = bclistoff
-start = bcliston -- For -j command line option.
+return {
+  line = bcline,
+  dump = bcdump,
+  targets = bctargets,
+  on = bcliston,
+  off = bclistoff,
+  start = bcliston -- For -j command line option.
+}
 
 

+ 16 - 14
Source/ThirdParty/LuaJIT/src/jit/bcsave.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT module to save/list bytecode.
 -- LuaJIT module to save/list bytecode.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 --
 --
@@ -11,7 +11,7 @@
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20003, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local bit = require("bit")
 local bit = require("bit")
 
 
 -- Symbol name prefix for LuaJIT bytecode.
 -- Symbol name prefix for LuaJIT bytecode.
@@ -63,13 +63,13 @@ local map_type = {
 }
 }
 
 
 local map_arch = {
 local map_arch = {
-  x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true,
+  x86 = true, x64 = true, arm = true, arm64 = true, ppc = true,
   mips = true, mipsel = true,
   mips = true, mipsel = true,
 }
 }
 
 
 local map_os = {
 local map_os = {
   linux = true, windows = true, osx = true, freebsd = true, netbsd = true,
   linux = true, windows = true, osx = true, freebsd = true, netbsd = true,
-  openbsd = true, solaris = true,
+  openbsd = true, dragonfly = true, solaris = true,
 }
 }
 
 
 local function checkarg(str, map, err)
 local function checkarg(str, map, err)
@@ -200,9 +200,9 @@ typedef struct {
 ]]
 ]]
   local symname = LJBC_PREFIX..ctx.modname
   local symname = LJBC_PREFIX..ctx.modname
   local is64, isbe = false, false
   local is64, isbe = false, false
-  if ctx.arch == "x64" then
+  if ctx.arch == "x64" or ctx.arch == "arm64" then
     is64 = true
     is64 = true
-  elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
+  elseif ctx.arch == "ppc" or ctx.arch == "mips" then
     isbe = true
     isbe = true
   end
   end
 
 
@@ -237,7 +237,7 @@ typedef struct {
   hdr.eendian = isbe and 2 or 1
   hdr.eendian = isbe and 2 or 1
   hdr.eversion = 1
   hdr.eversion = 1
   hdr.type = f16(1)
   hdr.type = f16(1)
-  hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch])
+  hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
   if ctx.arch == "mips" or ctx.arch == "mipsel" then
   if ctx.arch == "mips" or ctx.arch == "mipsel" then
     hdr.flags = 0x50001006
     hdr.flags = 0x50001006
   end
   end
@@ -477,13 +477,13 @@ typedef struct {
 } mach_obj_64;
 } mach_obj_64;
 typedef struct {
 typedef struct {
   mach_fat_header fat;
   mach_fat_header fat;
-  mach_fat_arch fat_arch[4];
+  mach_fat_arch fat_arch[2];
   struct {
   struct {
     mach_header hdr;
     mach_header hdr;
     mach_segment_command seg;
     mach_segment_command seg;
     mach_section sec;
     mach_section sec;
     mach_symtab_command sym;
     mach_symtab_command sym;
-  } arch[4];
+  } arch[2];
   mach_nlist sym_entry;
   mach_nlist sym_entry;
   uint8_t space[4096];
   uint8_t space[4096];
 } mach_fat_obj;
 } mach_fat_obj;
@@ -494,6 +494,8 @@ typedef struct {
     is64, align, mobj = true, 8, "mach_obj_64"
     is64, align, mobj = true, 8, "mach_obj_64"
   elseif ctx.arch == "arm" then
   elseif ctx.arch == "arm" then
     isfat, mobj = true, "mach_fat_obj"
     isfat, mobj = true, "mach_fat_obj"
+  elseif ctx.arch == "arm64" then
+    is64, align, isfat, mobj = true, 8, true, "mach_fat_obj"
   else
   else
     check(ctx.arch == "x86", "unsupported architecture for OSX")
     check(ctx.arch == "x86", "unsupported architecture for OSX")
   end
   end
@@ -503,8 +505,8 @@ typedef struct {
   -- Create Mach-O object and fill in header.
   -- Create Mach-O object and fill in header.
   local o = ffi.new(mobj)
   local o = ffi.new(mobj)
   local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
   local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
-  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch]
-  local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch]
+  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
+  local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
   if isfat then
   if isfat then
     o.fat.magic = be32(0xcafebabe)
     o.fat.magic = be32(0xcafebabe)
     o.fat.nfat_arch = be32(#cpusubtype)
     o.fat.nfat_arch = be32(#cpusubtype)
@@ -653,7 +655,7 @@ end
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-start = docmd -- Process -b command line option.
+return {
+  start = docmd -- Process -b command line option.
+}
 
 

+ 10 - 10
Source/ThirdParty/LuaJIT/src/jit/dis_arm.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT ARM disassembler module.
 -- LuaJIT ARM disassembler module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = addr or 0
   ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 16 then return map_gpr[r] end
   if r < 16 then return map_gpr[r] end
   return "d"..(r-16)
   return "d"..(r-16)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 
 

+ 16 - 16
Source/ThirdParty/LuaJIT/src/jit/dis_mips.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT MIPS disassembler module.
 -- LuaJIT MIPS disassembler module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -384,7 +384,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = addr or 0
   ctx.addr = addr or 0
@@ -396,33 +396,33 @@ local function create_(code, addr, out)
   return ctx
   return ctx
 end
 end
 
 
-local function create_el_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create_el(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.get = get_le
   ctx.get = get_le
   return ctx
   return ctx
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
-local function disass_el_(code, addr, out)
-  create_el_(code, addr, out):disass()
+local function disass_el(code, addr, out)
+  create_el(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
   return "f"..(r-32)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-create_el = create_el_
-disass = disass_
-disass_el = disass_el_
-regname = regname_
+return {
+  create = create,
+  create_el = create_el,
+  disass = disass,
+  disass_el = disass_el,
+  regname = regname
+}
 
 

+ 7 - 10
Source/ThirdParty/LuaJIT/src/jit/dis_mipsel.lua

@@ -1,20 +1,17 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT MIPSEL disassembler wrapper module.
 -- LuaJIT MIPSEL disassembler wrapper module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This module just exports the little-endian functions from the
 -- This module just exports the little-endian functions from the
 -- MIPS disassembler module. All the interesting stuff is there.
 -- MIPS disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
-local require = require
-
-module(...)
-
-local dis_mips = require(_PACKAGE.."dis_mips")
-
-create = dis_mips.create_el
-disass = dis_mips.disass_el
-regname = dis_mips.regname
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create_el,
+  disass = dis_mips.disass_el,
+  regname = dis_mips.regname
+}
 
 

+ 10 - 10
Source/ThirdParty/LuaJIT/src/jit/dis_ppc.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT PPC disassembler module.
 -- LuaJIT PPC disassembler module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = addr or 0
   ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
   return "f"..(r-32)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 
 

+ 7 - 10
Source/ThirdParty/LuaJIT/src/jit/dis_x64.lua

@@ -1,20 +1,17 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT x64 disassembler wrapper module.
 -- LuaJIT x64 disassembler wrapper module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This module just exports the 64 bit functions from the combined
 -- This module just exports the 64 bit functions from the combined
 -- x86/x64 disassembler module. All the interesting stuff is there.
 -- x86/x64 disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
-local require = require
-
-module(...)
-
-local dis_x86 = require(_PACKAGE.."dis_x86")
-
-create = dis_x86.create64
-disass = dis_x86.disass64
-regname = dis_x86.regname64
+local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
+return {
+  create = dis_x86.create64,
+  disass = dis_x86.disass64,
+  regname = dis_x86.regname64
+}
 
 

+ 183 - 91
Source/ThirdParty/LuaJIT/src/jit/dis_x86.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT x86/x64 disassembler module.
 -- LuaJIT x86/x64 disassembler module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
 -- This is a helper module used by the LuaJIT machine code dumper module.
@@ -15,19 +15,20 @@
 -- Intel and AMD manuals. The supported instruction set is quite extensive
 -- Intel and AMD manuals. The supported instruction set is quite extensive
 -- and reflects what a current generation Intel or AMD CPU implements in
 -- and reflects what a current generation Intel or AMD CPU implements in
 -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
 -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
--- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
--- instructions.
+-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
+-- (VMX/SVM) instructions.
 --
 --
 -- Notes:
 -- Notes:
 -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
 -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
 -- * No attempt at optimization has been made -- it's fast enough for my needs.
 -- * No attempt at optimization has been made -- it's fast enough for my needs.
--- * The public API may change when more architectures are added.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 
 
 local type = type
 local type = type
 local sub, byte, format = string.sub, string.byte, string.format
 local sub, byte, format = string.sub, string.byte, string.format
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 local lower, rep = string.lower, string.rep
 local lower, rep = string.lower, string.rep
+local bit = require("bit")
+local tohex = bit.tohex
 
 
 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
 local map_opc1_32 = {
 local map_opc1_32 = {
@@ -76,7 +77,7 @@ local map_opc1_32 = {
 "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
 "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
 "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
 "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
 --Cx
 --Cx
-"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
+"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
 "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
 "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
 --Dx
 --Dx
 "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
 "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({
   [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
   [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
   [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
   [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
   [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
   [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
-  [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
+  [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
   [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
   [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
 }, { __index = map_opc1_32 })
 }, { __index = map_opc1_32 })
 
 
@@ -112,12 +113,12 @@ local map_opc2 = {
 [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
 [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
 "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
 "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
 --1x
 --1x
-"movupsXrm|movssXrm|movupdXrm|movsdXrm",
-"movupsXmr|movssXmr|movupdXmr|movsdXmr",
+"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
+"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
 "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
 "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
 "movlpsXmr||movlpdXmr",
 "movlpsXmr||movlpdXmr",
-"unpcklpsXrm||unpcklpdXrm",
-"unpckhpsXrm||unpckhpdXrm",
+"unpcklpsXrvm||unpcklpdXrvm",
+"unpckhpsXrvm||unpckhpdXrvm",
 "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
 "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
 "movhpsXmr||movhpdXmr",
 "movhpsXmr||movhpdXmr",
 "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
 "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
@@ -126,7 +127,7 @@ local map_opc2 = {
 "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
 "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
 "movapsXrm||movapdXrm",
 "movapsXrm||movapdXrm",
 "movapsXmr||movapdXmr",
 "movapsXmr||movapdXmr",
-"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
+"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
 "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
 "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
 "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
 "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
 "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
 "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -142,27 +143,27 @@ local map_opc2 = {
 "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
 "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
 --5x
 --5x
 "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
 "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
-"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
-"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
-"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
-"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
-"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
+"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
+"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
+"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
+"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
+"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
 "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
 "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
-"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
-"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
+"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
+"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
 --6x
 --6x
-"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
-"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
-"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
-"||punpcklqdqXrm","||punpckhqdqXrm",
+"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
+"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
+"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
+"||punpcklqdqXrvm","||punpckhqdqXrvm",
 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
 --7x
 --7x
-"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
-"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
-"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
+"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
+"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
+"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
 nil,nil,
 nil,nil,
-"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
+"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
 "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
 "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
 --8x
 --8x
 "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
 "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -180,27 +181,27 @@ nil,nil,
 "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
 "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
 --Cx
 --Cx
 "xaddBmr","xaddVmr",
 "xaddBmr","xaddVmr",
-"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
-"pinsrwPrWmu","pextrwDrPmu",
-"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
+"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
+"pinsrwPrvWmu","pextrwDrPmu",
+"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
 "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
 "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
 --Dx
 --Dx
-"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
-"paddqPrm","pmullwPrm",
+"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
+"paddqPrvm","pmullwPrvm",
 "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
 "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
-"psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
-"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
+"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
+"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
 --Ex
 --Ex
-"pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
-"pmulhuwPrm","pmulhwPrm",
+"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
+"pmulhuwPrvm","pmulhwPrvm",
 "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
 "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
-"psubsbPrm","psubswPrm","pminswPrm","porPrm",
-"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
+"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
+"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
 --Fx
 --Fx
-"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
-"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
-"psubbPrm","psubwPrm","psubdPrm","psubqPrm",
-"paddbPrm","paddwPrm","padddPrm","ud",
+"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
+"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
+"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
+"paddbPrvm","paddwPrvm","padddPrvm","ud",
 }
 }
 assert(map_opc2[255] == "ud")
 assert(map_opc2[255] == "ud")
 
 
@@ -208,49 +209,70 @@ assert(map_opc2[255] == "ud")
 local map_opc3 = {
 local map_opc3 = {
 ["38"] = { -- [66] 0f 38 xx
 ["38"] = { -- [66] 0f 38 xx
 --0x
 --0x
-[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
-"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
-"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
-nil,nil,nil,nil,
+[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
+"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
+"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
+"||permilpsXrvm","||permilpdXrvm",nil,nil,
 --1x
 --1x
 "||pblendvbXrma",nil,nil,nil,
 "||pblendvbXrma",nil,nil,nil,
-"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
-nil,nil,nil,nil,
+"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
+"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
 "pabsbPrm","pabswPrm","pabsdPrm",nil,
 "pabsbPrm","pabswPrm","pabsdPrm",nil,
 --2x
 --2x
 "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
 "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
 "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
 "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
-"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
-nil,nil,nil,nil,
+"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
+"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
 --3x
 --3x
 "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
 "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
-"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
-"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
-"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
+"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
+"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
+"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
 --4x
 --4x
-"||pmulddXrm","||phminposuwXrm",
+"||pmulddXrvm","||phminposuwXrm",nil,nil,
+nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
+--5x
+[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
+[0x5a] = "||broadcasti128XrlXm",
+--7x
+[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
+--8x
+[0x8c] = "||pmaskmovXrvVSm",
+[0x8e] = "||pmaskmovVSmXvr",
+--Dx
+[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
+[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
 --Fx
 --Fx
 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
 },
 },
 
 
 ["3a"] = { -- [66] 0f 3a xx
 ["3a"] = { -- [66] 0f 3a xx
 --0x
 --0x
-[0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
-"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
-"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
+[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
+"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
+"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
+"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
 --1x
 --1x
 nil,nil,nil,nil,
 nil,nil,nil,nil,
 "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
 "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
-nil,nil,nil,nil,nil,nil,nil,nil,
+"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
+nil,nil,nil,nil,
 --2x
 --2x
-"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
+"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
+--3x
+[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
 --4x
 --4x
-[0x40] = "||dppsXrmu",
-[0x41] = "||dppdXrmu",
-[0x42] = "||mpsadbwXrmu",
+[0x40] = "||dppsXrvmu",
+[0x41] = "||dppdXrvmu",
+[0x42] = "||mpsadbwXrvmu",
+[0x44] = "||pclmulqdqXrvmu",
+[0x46] = "||perm2i128Xrvmu",
+[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
+[0x4c] = "||pblendvbXrvmb",
 --6x
 --6x
 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
+[0xdf] = "||aeskeygenassistXrmu",
 },
 },
 }
 }
 
 
@@ -354,17 +376,19 @@ local map_regs = {
 	"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
 	"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
   X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
   X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
 	"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
 	"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
+  Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
+	"ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
 }
 }
 local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
 local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
 
 
 -- Maps for size names.
 -- Maps for size names.
 local map_sz2n = {
 local map_sz2n = {
-  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
+  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
 }
 }
 local map_sz2prefix = {
 local map_sz2prefix = {
   B = "byte", W = "word", D = "dword",
   B = "byte", W = "word", D = "dword",
   Q = "qword",
   Q = "qword",
-  M = "qword", X = "xword",
+  M = "qword", X = "xword", Y = "yword",
   F = "dword", G = "qword", -- No need for sizes/register names for these two.
   F = "dword", G = "qword", -- No need for sizes/register names for these two.
 }
 }
 
 
@@ -387,10 +411,13 @@ local function putop(ctx, text, operands)
   if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
   if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
   if ctx.rex then
   if ctx.rex then
     local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
     local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
-	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
-    if t ~= "" then text = "rex."..t.." "..text end
+	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
+	      (ctx.vexl and "l" or "")
+    if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
+    if t ~= "" then text = ctx.rex.."."..t.." "..text
+    elseif ctx.rex == "vex" then text = "v"..text end
     ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
     ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
-    ctx.rex = false
+    ctx.rex = false; ctx.vexl = false; ctx.vexv = false
   end
   end
   if ctx.seg then
   if ctx.seg then
     local text2, n = gsub(text, "%[", "["..ctx.seg..":")
     local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -405,6 +432,7 @@ local function putop(ctx, text, operands)
   end
   end
   ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
   ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
   ctx.mrm = false
   ctx.mrm = false
+  ctx.vexv = false
   ctx.start = pos
   ctx.start = pos
   ctx.imm = nil
   ctx.imm = nil
 end
 end
@@ -413,7 +441,7 @@ end
 local function clearprefixes(ctx)
 local function clearprefixes(ctx)
   ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
   ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
   ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
   ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
-  ctx.rex = false; ctx.a32 = false
+  ctx.rex = false; ctx.a32 = false; ctx.vexl = false
 end
 end
 
 
 -- Fallback for incomplete opcodes at the end.
 -- Fallback for incomplete opcodes at the end.
@@ -450,9 +478,9 @@ end
 -- Process pattern string and generate the operands.
 -- Process pattern string and generate the operands.
 local function putpat(ctx, name, pat)
 local function putpat(ctx, name, pat)
   local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
   local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
-  local code, pos, stop = ctx.code, ctx.pos, ctx.stop
+  local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
 
 
-  -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
+  -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
   for p in gmatch(pat, ".") do
   for p in gmatch(pat, ".") do
     local x = nil
     local x = nil
     if p == "V" or p == "U" then
     if p == "V" or p == "U" then
@@ -467,11 +495,13 @@ local function putpat(ctx, name, pat)
     elseif p == "B" then
     elseif p == "B" then
       sz = "B"
       sz = "B"
       regs = ctx.rex and map_regs.B64 or map_regs.B
       regs = ctx.rex and map_regs.B64 or map_regs.B
-    elseif match(p, "[WDQMXFG]") then
+    elseif match(p, "[WDQMXYFG]") then
       sz = p
       sz = p
+      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
       regs = map_regs[sz]
       regs = map_regs[sz]
     elseif p == "P" then
     elseif p == "P" then
       sz = ctx.o16 and "X" or "M"; ctx.o16 = false
       sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
       regs = map_regs[sz]
       regs = map_regs[sz]
     elseif p == "S" then
     elseif p == "S" then
       name = name..lower(sz)
       name = name..lower(sz)
@@ -484,6 +514,10 @@ local function putpat(ctx, name, pat)
       local imm = getimm(ctx, pos, 1); if not imm then return end
       local imm = getimm(ctx, pos, 1); if not imm then return end
       x = format("0x%02x", imm)
       x = format("0x%02x", imm)
       pos = pos+1
       pos = pos+1
+    elseif p == "b" then
+      local imm = getimm(ctx, pos, 1); if not imm then return end
+      x = regs[imm/16+1]
+      pos = pos+1
     elseif p == "w" then
     elseif p == "w" then
       local imm = getimm(ctx, pos, 2); if not imm then return end
       local imm = getimm(ctx, pos, 2); if not imm then return end
       x = format("0x%x", imm)
       x = format("0x%x", imm)
@@ -532,7 +566,7 @@ local function putpat(ctx, name, pat)
 	local lo = imm % 0x1000000
 	local lo = imm % 0x1000000
 	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
 	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
       else
       else
-	x = format("0x%08x", imm)
+	x = "0x"..tohex(imm)
       end
       end
     elseif p == "R" then
     elseif p == "R" then
       local r = byte(code, pos-1, pos-1)%8
       local r = byte(code, pos-1, pos-1)%8
@@ -616,8 +650,13 @@ local function putpat(ctx, name, pat)
 	else
 	else
 	  x = "CR"..sp
 	  x = "CR"..sp
 	end
 	end
+      elseif p == "v" then
+	if ctx.vexv then
+	  x = regs[ctx.vexv+1]; ctx.vexv = false
+	end
       elseif p == "y" then x = "DR"..sp
       elseif p == "y" then x = "DR"..sp
       elseif p == "z" then x = "TR"..sp
       elseif p == "z" then x = "TR"..sp
+      elseif p == "l" then vexl = false
       elseif p == "t" then
       elseif p == "t" then
       else
       else
 	error("bad pattern `"..pat.."'")
 	error("bad pattern `"..pat.."'")
@@ -692,7 +731,7 @@ map_act = {
   B = putpat, W = putpat, D = putpat, Q = putpat,
   B = putpat, W = putpat, D = putpat, Q = putpat,
   V = putpat, U = putpat, T = putpat,
   V = putpat, U = putpat, T = putpat,
   M = putpat, X = putpat, P = putpat,
   M = putpat, X = putpat, P = putpat,
-  F = putpat, G = putpat,
+  F = putpat, G = putpat, Y = putpat,
 
 
   -- Collect prefixes.
   -- Collect prefixes.
   [":"] = function(ctx, name, pat)
   [":"] = function(ctx, name, pat)
@@ -753,15 +792,68 @@ map_act = {
 
 
   -- REX prefix.
   -- REX prefix.
   rex = function(ctx, name, pat)
   rex = function(ctx, name, pat)
-    if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
+    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
     for p in gmatch(pat, ".") do ctx["rex"..p] = true end
     for p in gmatch(pat, ".") do ctx["rex"..p] = true end
-    ctx.rex = true
+    ctx.rex = "rex"
+  end,
+
+  -- VEX prefix.
+  vex = function(ctx, name, pat)
+    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
+    ctx.rex = "vex"
+    local pos = ctx.pos
+    if ctx.mrm then
+      ctx.mrm = nil
+      pos = pos-1
+    end
+    local b = byte(ctx.code, pos, pos)
+    if not b then return incomplete(ctx) end
+    pos = pos+1
+    if b < 128 then ctx.rexr = true end
+    local m = 1
+    if pat == "3" then
+      m = b%32; b = (b-m)/32
+      local nb = b%2; b = (b-nb)/2
+      if nb == 0 then ctx.rexb = true end
+      local nx = b%2; b = (b-nx)/2
+      if nx == 0 then ctx.rexx = true end
+      b = byte(ctx.code, pos, pos)
+      if not b then return incomplete(ctx) end
+      pos = pos+1
+      if b >= 128 then ctx.rexw = true end
+    end
+    ctx.pos = pos
+    local map
+    if m == 1 then map = map_opc2
+    elseif m == 2 then map = map_opc3["38"]
+    elseif m == 3 then map = map_opc3["3a"]
+    else return unknown(ctx) end
+    local p = b%4; b = (b-p)/4
+    if p == 1 then ctx.o16 = "o16"
+    elseif p == 2 then ctx.rep = "rep"
+    elseif p == 3 then ctx.rep = "repne" end
+    local l = b%2; b = (b-l)/2
+    if l ~= 0 then ctx.vexl = true end
+    ctx.vexv = (-1-b)%16
+    return dispatchmap(ctx, map)
   end,
   end,
 
 
   -- Special case for nop with REX prefix.
   -- Special case for nop with REX prefix.
   nop = function(ctx, name, pat)
   nop = function(ctx, name, pat)
     return dispatch(ctx, ctx.rex and pat or "nop")
     return dispatch(ctx, ctx.rex and pat or "nop")
   end,
   end,
+
+  -- Special case for 0F 77.
+  emms = function(ctx, name, pat)
+    if ctx.rex ~= "vex" then
+      return putop(ctx, "emms")
+    elseif ctx.vexl then
+      ctx.vexl = false
+      return putop(ctx, "zeroall")
+    else
+      return putop(ctx, "zeroupper")
+    end
+  end,
 }
 }
 
 
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
@@ -782,7 +874,7 @@ local function disass_block(ctx, ofs, len)
 end
 end
 
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   local ctx = {}
   ctx.code = code
   ctx.code = code
   ctx.addr = (addr or 0) - 1
   ctx.addr = (addr or 0) - 1
@@ -796,8 +888,8 @@ local function create_(code, addr, out)
   return ctx
   return ctx
 end
 end
 
 
-local function create64_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create64(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.x64 = true
   ctx.x64 = true
   ctx.map1 = map_opc1_64
   ctx.map1 = map_opc1_64
   ctx.aregs = map_regs.Q
   ctx.aregs = map_regs.Q
@@ -805,32 +897,32 @@ local function create64_(code, addr, out)
 end
 end
 
 
 -- Simple API: disassemble code (a string) at address and output via out.
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 end
 
 
-local function disass64_(code, addr, out)
-  create64_(code, addr, out):disass()
+local function disass64(code, addr, out)
+  create64(code, addr, out):disass()
 end
 end
 
 
 -- Return register name for RID.
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 8 then return map_regs.D[r+1] end
   if r < 8 then return map_regs.D[r+1] end
   return map_regs.X[r-7]
   return map_regs.X[r-7]
 end
 end
 
 
-local function regname64_(r)
+local function regname64(r)
   if r < 16 then return map_regs.Q[r+1] end
   if r < 16 then return map_regs.Q[r+1] end
   return map_regs.X[r-15]
   return map_regs.X[r-15]
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-create = create_
-create64 = create64_
-disass = disass_
-disass64 = disass64_
-regname = regname_
-regname64 = regname64_
+return {
+  create = create,
+  create64 = create64,
+  disass = disass,
+  disass64 = disass64,
+  regname = regname,
+  regname64 = regname64
+}
 
 

+ 20 - 12
Source/ThirdParty/LuaJIT/src/jit/dump.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- LuaJIT compiler dump module.
 -- LuaJIT compiler dump module.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 --
 --
@@ -55,7 +55,7 @@
 
 
 -- Cache some library functions and objects.
 -- Cache some library functions and objects.
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20003, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local vmdef = require("jit.vmdef")
 local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
 local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
 local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
 local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
 local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
 local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
 local bit = require("bit")
 local bit = require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
 local sub, gsub, format = string.sub, string.gsub, string.format
 local sub, gsub, format = string.sub, string.gsub, string.format
 local byte, char, rep = string.byte, string.char, string.rep
 local byte, char, rep = string.byte, string.char, string.rep
 local type, tostring = type, tostring
 local type, tostring = type, tostring
@@ -91,6 +91,7 @@ local function fillsymtab_tr(tr, nexit)
   end
   end
   for i=0,nexit-1 do
   for i=0,nexit-1 do
     local addr = traceexitstub(tr, i)
     local addr = traceexitstub(tr, i)
+    if addr < 0 then addr = addr + 2^32 end
     t[addr] = tostring(i)
     t[addr] = tostring(i)
   end
   end
   local addr = traceexitstub(tr, nexit)
   local addr = traceexitstub(tr, nexit)
@@ -104,7 +105,10 @@ local function fillsymtab(tr, nexit)
     local ircall = vmdef.ircall
     local ircall = vmdef.ircall
     for i=0,#ircall do
     for i=0,#ircall do
       local addr = ircalladdr(i)
       local addr = ircalladdr(i)
-      if addr ~= 0 then t[addr] = ircall[i] end
+      if addr ~= 0 then
+	if addr < 0 then addr = addr + 2^32 end
+	t[addr] = ircall[i]
+      end
     end
     end
   end
   end
   if nexitsym == 1000000 then -- Per-trace exit stubs.
   if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -118,6 +122,7 @@ local function fillsymtab(tr, nexit)
 	nexit = 1000000
 	nexit = 1000000
 	break
 	break
       end
       end
+      if addr < 0 then addr = addr + 2^32 end
       t[addr] = tostring(i)
       t[addr] = tostring(i)
     end
     end
     nexitsym = nexit
     nexitsym = nexit
@@ -136,6 +141,7 @@ local function dump_mcode(tr)
   local mcode, addr, loop = tracemc(tr)
   local mcode, addr, loop = tracemc(tr)
   if not mcode then return end
   if not mcode then return end
   if not disass then disass = require("jit.dis_"..jit.arch) end
   if not disass then disass = require("jit.dis_"..jit.arch) end
+  if addr < 0 then addr = addr + 2^32 end
   out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
   out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
   local ctx = disass.create(mcode, addr, dumpwrite)
   local ctx = disass.create(mcode, addr, dumpwrite)
   ctx.hexdump = 0
   ctx.hexdump = 0
@@ -270,8 +276,7 @@ local litname = {
   ["CONV  "] = setmetatable({}, { __index = function(t, mode)
   ["CONV  "] = setmetatable({}, { __index = function(t, mode)
     local s = irtype[band(mode, 31)]
     local s = irtype[band(mode, 31)]
     s = irtype[band(shr(mode, 5), 31)].."."..s
     s = irtype[band(shr(mode, 5), 31)].."."..s
-    if band(mode, 0x400) ~= 0 then s = s.." trunc"
-    elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
+    if band(mode, 0x800) ~= 0 then s = s.." sext" end
     local c = shr(mode, 14)
     local c = shr(mode, 14)
     if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
     if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
     t[mode] = s
     t[mode] = s
@@ -280,6 +285,8 @@ local litname = {
   ["FLOAD "] = vmdef.irfield,
   ["FLOAD "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FPMATH"] = vmdef.irfpm,
   ["FPMATH"] = vmdef.irfpm,
+  ["BUFHDR"] = { [0] = "RESET", "APPEND" },
+  ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
 }
 }
 
 
 local function ctlsub(c)
 local function ctlsub(c)
@@ -564,6 +571,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
     end
     end
     if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
     if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
   else
   else
+    if what == "flush" then symtab, nexitsym = {}, 0 end
     out:write("---- TRACE ", what, "\n\n")
     out:write("---- TRACE ", what, "\n\n")
   end
   end
   out:flush()
   out:flush()
@@ -607,7 +615,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
       end
       end
     else
     else
       for i=1,ngpr do
       for i=1,ngpr do
-	out:write(format(" %08x", regs[i]))
+	out:write(" ", tohex(regs[i]))
 	if i % 8 == 0 then out:write("\n") end
 	if i % 8 == 0 then out:write("\n") end
       end
       end
     end
     end
@@ -691,9 +699,9 @@ local function dumpon(opt, outfile)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 
 

+ 310 - 0
Source/ThirdParty/LuaJIT/src/jit/p.lua

@@ -0,0 +1,310 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module is a simple command line interface to the built-in
+-- low-overhead profiler of LuaJIT.
+--
+-- The lower-level API of the profiler is accessible via the "jit.profile"
+-- module or the luaJIT_profile_* C API.
+--
+-- Example usage:
+--
+--   luajit -jp myapp.lua
+--   luajit -jp=s myapp.lua
+--   luajit -jp=-s myapp.lua
+--   luajit -jp=vl myapp.lua
+--   luajit -jp=G,profile.txt myapp.lua
+--
+-- The following dump features are available:
+--
+--   f  Stack dump: function name, otherwise module:line. Default mode.
+--   F  Stack dump: ditto, but always prepend module.
+--   l  Stack dump: module:line.
+--   <number> stack dump depth (callee < caller). Default: 1.
+--   -<number> Inverse stack dump depth (caller > callee).
+--   s  Split stack dump after first stack level. Implies abs(depth) >= 2.
+--   p  Show full path for module names.
+--   v  Show VM states. Can be combined with stack dumps, e.g. vf or fv.
+--   z  Show zones. Can be combined with stack dumps, e.g. zf or fz.
+--   r  Show raw sample counts. Default: show percentages.
+--   a  Annotate excerpts from source code files.
+--   A  Annotate complete source code files.
+--   G  Produce raw output suitable for graphical tools (e.g. flame graphs).
+--   m<number> Minimum sample percentage to be shown. Default: 3.
+--   i<number> Sampling interval in milliseconds. Default: 10.
+--
+----------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
+local profile = require("jit.profile")
+local vmdef = require("jit.vmdef")
+local math = math
+local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
+local sort, format = table.sort, string.format
+local stdout = io.stdout
+local zone -- Load jit.zone module on demand.
+
+-- Output file handle.
+local out
+
+------------------------------------------------------------------------------
+
+local prof_ud
+local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
+local prof_ann, prof_count1, prof_count2, prof_samples
+
+local map_vmmode = {
+  N = "Compiled",
+  I = "Interpreted",
+  C = "C code",
+  G = "Garbage Collector",
+  J = "JIT Compiler",
+}
+
+-- Profiler callback.
+local function prof_cb(th, samples, vmmode)
+  prof_samples = prof_samples + samples
+  local key_stack, key_stack2, key_state
+  -- Collect keys for sample.
+  if prof_states then
+    if prof_states == "v" then
+      key_state = map_vmmode[vmmode] or vmmode
+    else
+      key_state = zone:get() or "(none)"
+    end
+  end
+  if prof_fmt then
+    key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
+    key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
+      return vmdef.ffnames[tonumber(x)]
+    end)
+    if prof_split == 2 then
+      local k1, k2 = key_stack:match("(.-) [<>] (.*)")
+      if k2 then key_stack, key_stack2 = k1, k2 end
+    elseif prof_split == 3 then
+      key_stack2 = profile.dumpstack(th, "l", 1)
+    end
+  end
+  -- Order keys.
+  local k1, k2
+  if prof_split == 1 then
+    if key_state then
+      k1 = key_state
+      if key_stack then k2 = key_stack end
+    end
+  elseif key_stack then
+    k1 = key_stack
+    if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
+  end
+  -- Coalesce samples in one or two levels.
+  if k1 then
+    local t1 = prof_count1
+    t1[k1] = (t1[k1] or 0) + samples
+    if k2 then
+      local t2 = prof_count2
+      local t3 = t2[k1]
+      if not t3 then t3 = {}; t2[k1] = t3 end
+      t3[k2] = (t3[k2] or 0) + samples
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Show top N list.
+local function prof_top(count1, count2, samples, indent)
+  local t, n = {}, 0
+  for k, v in pairs(count1) do
+    n = n + 1
+    t[n] = k
+  end
+  sort(t, function(a, b) return count1[a] > count1[b] end)
+  for i=1,n do
+    local k = t[i]
+    local v = count1[k]
+    local pct = floor(v*100/samples + 0.5)
+    if pct < prof_min then break end
+    if not prof_raw then
+      out:write(format("%s%2d%%  %s\n", indent, pct, k))
+    elseif prof_raw == "r" then
+      out:write(format("%s%5d  %s\n", indent, v, k))
+    else
+      out:write(format("%s %d\n", k, v))
+    end
+    if count2 then
+      local r = count2[k]
+      if r then
+	prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and "  -- " or
+			    (prof_depth < 0 and "  -> " or "  <- "))
+      end
+    end
+  end
+end
+
+-- Annotate source code
+local function prof_annotate(count1, samples)
+  local files = {}
+  local ms = 0
+  for k, v in pairs(count1) do
+    local pct = floor(v*100/samples + 0.5)
+    ms = math.max(ms, v)
+    if pct >= prof_min then
+      local file, line = k:match("^(.*):(%d+)$")
+      local fl = files[file]
+      if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
+      line = tonumber(line)
+      fl[line] = prof_raw and v or pct
+    end
+  end
+  sort(files)
+  local fmtv, fmtn = " %3d%% | %s\n", "      | %s\n"
+  if prof_raw then
+    local n = math.max(5, math.ceil(math.log10(ms)))
+    fmtv = "%"..n.."d | %s\n"
+    fmtn = (" "):rep(n).." | %s\n"
+  end
+  local ann = prof_ann
+  for _, file in ipairs(files) do
+    local f0 = file:byte()
+    if f0 == 40 or f0 == 91 then
+      out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
+      break
+    end
+    local fp, err = io.open(file)
+    if not fp then
+      out:write(format("====== ERROR: %s: %s\n", file, err))
+      break
+    end
+    out:write(format("\n====== %s ======\n", file))
+    local fl = files[file]
+    local n, show = 1, false
+    if ann ~= 0 then
+      for i=1,ann do
+	if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
+      end
+    end
+    for line in fp:lines() do
+      if line:byte() == 27 then
+	out:write("[Cannot annotate bytecode file]\n")
+	break
+      end
+      local v = fl[n]
+      if ann ~= 0 then
+	local v2 = fl[n+ann]
+	if show then
+	  if v2 then show = n+ann elseif v then show = n
+	  elseif show+ann < n then show = false end
+	elseif v2 then
+	  show = n+ann
+	  out:write(format("@@ %d @@\n", n))
+	end
+	if not show then goto next end
+      end
+      if v then
+	out:write(format(fmtv, v, line))
+      else
+	out:write(format(fmtn, line))
+      end
+    ::next::
+      n = n + 1
+    end
+    fp:close()
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Finish profiling and dump result.
+local function prof_finish()
+  if prof_ud then
+    profile.stop()
+    local samples = prof_samples
+    if samples == 0 then
+      if prof_raw ~= true then out:write("[No samples collected]\n") end
+      return
+    end
+    if prof_ann then
+      prof_annotate(prof_count1, samples)
+    else
+      prof_top(prof_count1, prof_count2, samples, "")
+    end
+    prof_count1 = nil
+    prof_count2 = nil
+    prof_ud = nil
+  end
+end
+
+-- Start profiling.
+local function prof_start(mode)
+  local interval = ""
+  mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
+  prof_min = 3
+  mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
+  prof_depth = 1
+  mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
+  local m = {}
+  for c in mode:gmatch(".") do m[c] = c end
+  prof_states = m.z or m.v
+  if prof_states == "z" then zone = require("jit.zone") end
+  local scope = m.l or m.f or m.F or (prof_states and "" or "f")
+  local flags = (m.p or "")
+  prof_raw = m.r
+  if m.s then
+    prof_split = 2
+    if prof_depth == -1 or m["-"] then prof_depth = -2
+    elseif prof_depth == 1 then prof_depth = 2 end
+  elseif mode:find("[fF].*l") then
+    scope = "l"
+    prof_split = 3
+  else
+    prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
+  end
+  prof_ann = m.A and 0 or (m.a and 3)
+  if prof_ann then
+    scope = "l"
+    prof_fmt = "pl"
+    prof_split = 0
+    prof_depth = 1
+  elseif m.G and scope ~= "" then
+    prof_fmt = flags..scope.."Z;"
+    prof_depth = -100
+    prof_raw = true
+    prof_min = 0
+  elseif scope == "" then
+    prof_fmt = false
+  else
+    local sc = prof_split == 3 and m.f or m.F or scope
+    prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
+  end
+  prof_count1 = {}
+  prof_count2 = {}
+  prof_samples = 0
+  profile.start(scope:lower()..interval, prof_cb)
+  prof_ud = newproxy(true)
+  getmetatable(prof_ud).__gc = prof_finish
+end
+
+------------------------------------------------------------------------------
+
+local function start(mode, outfile)
+  if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
+  if outfile then
+    out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+  else
+    out = stdout
+  end
+  prof_start(mode or "f")
+end
+
+-- Public module functions.
+return {
+  start = start, -- For -j command line option.
+  stop = prof_finish
+}
+

+ 10 - 7
Source/ThirdParty/LuaJIT/src/jit/v.lua

@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 -- Verbose mode of the LuaJIT compiler.
 -- Verbose mode of the LuaJIT compiler.
 --
 --
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 ----------------------------------------------------------------------------
 --
 --
@@ -59,7 +59,7 @@
 
 
 -- Cache some library functions and objects.
 -- Cache some library functions and objects.
 local jit = require("jit")
 local jit = require("jit")
-assert(jit.version_num == 20003, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local vmdef = require("jit.vmdef")
 local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
 local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
       if ltype == "interpreter" then
       if ltype == "interpreter" then
 	out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
 	out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
 	  tr, startex, startloc))
 	  tr, startex, startloc))
+      elseif ltype == "stitch" then
+	out:write(format("[TRACE %3s %s%s %s %s]\n",
+	  tr, startex, startloc, ltype, fmtfunc(func, pc)))
       elseif link == tr or link == 0 then
       elseif link == tr or link == 0 then
 	out:write(format("[TRACE %3s %s%s %s]\n",
 	out:write(format("[TRACE %3s %s%s %s]\n",
 	  tr, startex, startloc, ltype))
 	  tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
 end
 end
 
 
 -- Public module functions.
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 
 

+ 45 - 0
Source/ThirdParty/LuaJIT/src/jit/zone.lua

@@ -0,0 +1,45 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler zones.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module implements a simple hierarchical zone model.
+--
+-- Example usage:
+--
+--   local zone = require("jit.zone")
+--   zone("AI")
+--   ...
+--     zone("A*")
+--     ...
+--     print(zone:get()) --> "A*"
+--     ...
+--     zone()
+--   ...
+--   print(zone:get()) --> "AI"
+--   ...
+--   zone()
+--
+----------------------------------------------------------------------------
+
+local remove = table.remove
+
+return setmetatable({
+  flush = function(t)
+    for i=#t,1,-1 do t[i] = nil end
+  end,
+  get = function(t)
+    return t[#t]
+  end
+}, {
+  __call = function(t, zone)
+    if zone then
+      t[#t+1] = zone
+    else
+      return (assert(remove(t), "empty zone stack"))
+    end
+  end
+})
+

+ 2 - 2
Source/ThirdParty/LuaJIT/src/lib_aux.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Auxiliary library for the Lua/C API.
 ** Auxiliary library for the Lua/C API.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major parts taken verbatim or adapted from the Lua interpreter.
 ** Major parts taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -302,7 +302,7 @@ static int panic(lua_State *L)
 
 
 #ifdef LUAJIT_USE_SYSMALLOC
 #ifdef LUAJIT_USE_SYSMALLOC
 
 
-#if LJ_64
+#if LJ_64 && !defined(LUAJIT_USE_VALGRIND)
 #error "Must use builtin allocator for 64 bit target"
 #error "Must use builtin allocator for 64 bit target"
 #endif
 #endif
 
 

+ 34 - 53
Source/ThirdParty/LuaJIT/src/lib_base.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Base and coroutine library.
 ** Base and coroutine library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -32,6 +32,7 @@
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
 #include "lj_char.h"
 #include "lj_char.h"
 #include "lj_strscan.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* -- Base library: checks ------------------------------------------------ */
 /* -- Base library: checks ------------------------------------------------ */
@@ -86,10 +87,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
   cTValue *mo = lj_meta_lookup(L, o, mm);
   cTValue *mo = lj_meta_lookup(L, o, mm);
   if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
   if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
     L->top = o+1;  /* Only keep one argument. */
     L->top = o+1;  /* Only keep one argument. */
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
     return FFH_TAILCALL;
   } else {
   } else {
     if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
     if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
+    if (LJ_FR2) { copyTV(L, o-1, o); o--; }
     setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
     setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
     if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
     if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
     return FFH_RES(3);
     return FFH_RES(3);
@@ -100,7 +102,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
 #endif
 #endif
 
 
 LJLIB_PUSH(lastcl)
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(pairs)
+LJLIB_ASM(pairs)		LJLIB_REC(xpairs 0)
 {
 {
   return ffh_pairs(L, MM_pairs);
   return ffh_pairs(L, MM_pairs);
 }
 }
@@ -113,7 +115,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux)	LJLIB_REC(.)
 }
 }
 
 
 LJLIB_PUSH(lastcl)
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(ipairs)		LJLIB_REC(.)
+LJLIB_ASM(ipairs)		LJLIB_REC(xpairs 1)
 {
 {
   return ffh_pairs(L, MM_ipairs);
   return ffh_pairs(L, MM_ipairs);
 }
 }
@@ -131,11 +133,11 @@ LJLIB_ASM(setmetatable)		LJLIB_REC(.)
     lj_err_caller(L, LJ_ERR_PROTMT);
     lj_err_caller(L, LJ_ERR_PROTMT);
   setgcref(t->metatable, obj2gco(mt));
   setgcref(t->metatable, obj2gco(mt));
   if (mt) { lj_gc_objbarriert(L, t, mt); }
   if (mt) { lj_gc_objbarriert(L, t, mt); }
-  settabV(L, L->base-1, t);
+  settabV(L, L->base-1-LJ_FR2, t);
   return FFH_RES(1);
   return FFH_RES(1);
 }
 }
 
 
-LJLIB_CF(getfenv)
+LJLIB_CF(getfenv)		LJLIB_REC(.)
 {
 {
   GCfunc *fn;
   GCfunc *fn;
   cTValue *o = L->base;
   cTValue *o = L->base;
@@ -144,6 +146,7 @@ LJLIB_CF(getfenv)
     o = lj_debug_frame(L, level, &level);
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   }
   fn = &gcval(o)->fn;
   fn = &gcval(o)->fn;
   settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
   settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +168,7 @@ LJLIB_CF(setfenv)
     o = lj_debug_frame(L, level, &level);
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   }
   fn = &gcval(o)->fn;
   fn = &gcval(o)->fn;
   if (!isluafunc(fn))
   if (!isluafunc(fn))
@@ -257,7 +261,7 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
   if (base == 10) {
   if (base == 10) {
     TValue *o = lj_lib_checkany(L, 1);
     TValue *o = lj_lib_checkany(L, 1);
     if (lj_strscan_numberobj(o)) {
     if (lj_strscan_numberobj(o)) {
-      copyTV(L, L->base-1, o);
+      copyTV(L, L->base-1-LJ_FR2, o);
       return FFH_RES(1);
       return FFH_RES(1);
     }
     }
 #if LJ_HASFFI
 #if LJ_HASFFI
@@ -270,11 +274,11 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
 	    ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
 	    ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
 	  int32_t i;
 	  int32_t i;
 	  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
 	  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
-	  setintV(L->base-1, i);
+	  setintV(L->base-1-LJ_FR2, i);
 	  return FFH_RES(1);
 	  return FFH_RES(1);
 	}
 	}
 	lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
 	lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
-		       (uint8_t *)&(L->base-1)->n, o, 0);
+		       (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
 	return FFH_RES(1);
 	return FFH_RES(1);
       }
       }
     }
     }
@@ -290,45 +294,29 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
       while (lj_char_isspace((unsigned char)(*ep))) ep++;
       while (lj_char_isspace((unsigned char)(*ep))) ep++;
       if (*ep == '\0') {
       if (*ep == '\0') {
 	if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
 	if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
-	  setintV(L->base-1, (int32_t)ul);
+	  setintV(L->base-1-LJ_FR2, (int32_t)ul);
 	else
 	else
-	  setnumV(L->base-1, (lua_Number)ul);
+	  setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
 	return FFH_RES(1);
 	return FFH_RES(1);
       }
       }
     }
     }
   }
   }
-  setnilV(L->base-1);
+  setnilV(L->base-1-LJ_FR2);
   return FFH_RES(1);
   return FFH_RES(1);
 }
 }
 
 
-LJLIB_PUSH("nil")
-LJLIB_PUSH("false")
-LJLIB_PUSH("true")
 LJLIB_ASM(tostring)		LJLIB_REC(.)
 LJLIB_ASM(tostring)		LJLIB_REC(.)
 {
 {
   TValue *o = lj_lib_checkany(L, 1);
   TValue *o = lj_lib_checkany(L, 1);
   cTValue *mo;
   cTValue *mo;
   L->top = o+1;  /* Only keep one argument. */
   L->top = o+1;  /* Only keep one argument. */
   if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
   if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
     return FFH_TAILCALL;
-  } else {
-    GCstr *s;
-    if (tvisnumber(o)) {
-      s = lj_str_fromnumber(L, o);
-    } else if (tvispri(o)) {
-      s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
-    } else {
-      if (tvisfunc(o) && isffunc(funcV(o)))
-	lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
-      else
-	lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
-      /* Note: lua_pushfstring calls the GC which may invalidate o. */
-      s = strV(L->top-1);
-    }
-    setstrV(L, L->base-1, s);
-    return FFH_RES(1);
   }
   }
+  lj_gc_check(L);
+  setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
+  return FFH_RES(1);
 }
 }
 
 
 /* -- Base library: throw and catch errors -------------------------------- */
 /* -- Base library: throw and catch errors -------------------------------- */
@@ -440,20 +428,20 @@ LJLIB_CF(dofile)
 
 
 LJLIB_CF(gcinfo)
 LJLIB_CF(gcinfo)
 {
 {
-  setintV(L->top++, (G(L)->gc.total >> 10));
+  setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
   return 1;
   return 1;
 }
 }
 
 
 LJLIB_CF(collectgarbage)
 LJLIB_CF(collectgarbage)
 {
 {
   int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT,  /* ORDER LUA_GC* */
   int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT,  /* ORDER LUA_GC* */
-    "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul");
+    "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning");
   int32_t data = lj_lib_optint(L, 2, 0);
   int32_t data = lj_lib_optint(L, 2, 0);
   if (opt == LUA_GCCOUNT) {
   if (opt == LUA_GCCOUNT) {
     setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
     setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
   } else {
   } else {
     int res = lua_gc(L, opt, data);
     int res = lua_gc(L, opt, data);
-    if (opt == LUA_GCSTEP)
+    if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING)
       setboolV(L->top, res);
       setboolV(L->top, res);
     else
     else
       setintV(L->top, res);
       setintV(L->top, res);
@@ -506,21 +494,13 @@ LJLIB_CF(print)
   }
   }
   shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
   shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
   for (i = 0; i < nargs; i++) {
   for (i = 0; i < nargs; i++) {
+    cTValue *o = &L->base[i];
+    char buf[STRFMT_MAXBUF_NUM];
     const char *str;
     const char *str;
     size_t size;
     size_t size;
-    cTValue *o = &L->base[i];
-    if (shortcut && tvisstr(o)) {
-      str = strVdata(o);
-      size = strV(o)->len;
-    } else if (shortcut && tvisint(o)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(o));
-      size = (size_t)(buf+LJ_STR_INTBUF-p);
-      str = p;
-    } else if (shortcut && tvisnum(o)) {
-      char buf[LJ_STR_NUMBUF];
-      size = lj_str_bufnum(buf, o);
-      str = buf;
+    MSize len;
+    if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) {
+      size = len;
     } else {
     } else {
       copyTV(L, L->top+1, o);
       copyTV(L, L->top+1, o);
       copyTV(L, L->top, L->top-1);
       copyTV(L, L->top, L->top-1);
@@ -558,7 +538,7 @@ LJLIB_CF(coroutine_status)
   if (co == L) s = "running";
   if (co == L) s = "running";
   else if (co->status == LUA_YIELD) s = "suspended";
   else if (co->status == LUA_YIELD) s = "suspended";
   else if (co->status != 0) s = "dead";
   else if (co->status != 0) s = "dead";
-  else if (co->base > tvref(co->stack)+1) s = "normal";
+  else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
   else if (co->top == co->base) s = "dead";
   else if (co->top == co->base) s = "dead";
   else s = "suspended";
   else s = "suspended";
   lua_pushstring(L, s);
   lua_pushstring(L, s);
@@ -600,8 +580,8 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
       (co->status == 0 && co->top == co->base)) {
       (co->status == 0 && co->top == co->base)) {
     ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
     ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
     if (wrap) lj_err_caller(L, em);
     if (wrap) lj_err_caller(L, em);
-    setboolV(L->base-1, 0);
-    setstrV(L, L->base, lj_err_str(L, em));
+    setboolV(L->base-1-LJ_FR2, 0);
+    setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
     return FFH_RES(2);
     return FFH_RES(2);
   }
   }
   lj_state_growstack(co, (MSize)(L->top - L->base));
   lj_state_growstack(co, (MSize)(L->top - L->base));
@@ -642,9 +622,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
 
 
 LJLIB_CF(coroutine_wrap)
 LJLIB_CF(coroutine_wrap)
 {
 {
+  GCfunc *fn;
   lj_cf_coroutine_create(L);
   lj_cf_coroutine_create(L);
-  lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
-  setpc_wrap_aux(L, funcV(L->top-1));
+  fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
+  setpc_wrap_aux(L, fn);
   return 1;
   return 1;
 }
 }
 
 

+ 121 - 15
Source/ThirdParty/LuaJIT/src/lib_bit.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Bit manipulation library.
 ** Bit manipulation library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lib_bit_c
 #define lib_bit_c
@@ -12,26 +12,99 @@
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_err.h"
 #include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
+#include "lj_strscan.h"
+#include "lj_strfmt.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#include "lj_carith.h"
+#endif
+#include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_bit
 #define LJLIB_MODULE_bit
 
 
-LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_unary IR_TOBIT)
+#if LJ_HASFFI
+static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
 {
 {
+  GCcdata *cd = lj_cdata_new_(L, id, 8);
+  *(uint64_t *)cdataptr(cd) = x;
+  setcdataV(L, L->base-1-LJ_FR2, cd);
+  return FFH_RES(1);
+}
+#else
+static int32_t bit_checkbit(lua_State *L, int narg)
+{
+  TValue *o = L->base + narg-1;
+  if (!(o < L->top && lj_strscan_numberobj(o)))
+    lj_err_argt(L, narg, LUA_TNUMBER);
+  if (LJ_LIKELY(tvisint(o))) {
+    return intV(o);
+  } else {
+    int32_t i = lj_num2bit(numV(o));
+    if (LJ_DUALNUM) setintV(o, i);
+    return i;
+  }
+}
+#endif
+
+LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_tobit)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
+  return FFH_RES(1);
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, ~x) : FFH_RETRY;
+#else
   lj_lib_checknumber(L, 1);
   lj_lib_checknumber(L, 1);
   return FFH_RETRY;
   return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
 }
 }
-LJLIB_ASM_(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
-LJLIB_ASM_(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
 
 
 LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
 LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
 {
 {
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
+  if (id) {
+    x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
+    return bit_result64(L, id, x);
+  }
+  if (id2) setintV(L->base+1, sh);
+  return FFH_RETRY;
+#else
   lj_lib_checknumber(L, 1);
   lj_lib_checknumber(L, 1);
-  lj_lib_checkbit(L, 2);
+  bit_checkbit(L, 2);
   return FFH_RETRY;
   return FFH_RETRY;
+#endif
 }
 }
 LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
 LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
 LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
 LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
 
 
 LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
 LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
 {
 {
+#if LJ_HASFFI
+  CTypeID id = 0;
+  TValue *o = L->base, *top = L->top;
+  int i = 0;
+  do { lj_carith_check64(L, ++i, &id); } while (++o < top);
+  if (id) {
+    CTState *cts = ctype_cts(L);
+    CType *ct = ctype_get(cts, id);
+    int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
+    uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
+    o = L->base;
+    do {
+      lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
+      if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
+    } while (++o < top);
+    return bit_result64(L, id, y);
+  }
+  return FFH_RETRY;
+#else
   int i = 0;
   int i = 0;
   do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
   do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
   return FFH_RETRY;
   return FFH_RETRY;
+#endif
 }
 }
 LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
 LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
 LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
 LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-LJLIB_CF(bit_tohex)
+LJLIB_CF(bit_tohex)		LJLIB_REC(.)
 {
 {
-  uint32_t b = (uint32_t)lj_lib_checkbit(L, 1);
-  int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2);
-  const char *hexdigits = "0123456789abcdef";
-  char buf[8];
-  if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
-  if (n > 8) n = 8;
-  for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
-  lua_pushlstring(L, buf, (size_t)n);
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t b = lj_carith_check64(L, 1, &id);
+  int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
+				  (int32_t)lj_carith_check64(L, 2, &id2);
+#else
+  uint32_t b = (uint32_t)bit_checkbit(L, 1);
+  int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
+#endif
+  SBuf *sb = lj_buf_tmp_(L);
+  SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
+  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
+#if LJ_HASFFI
+  if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
+#else
+  if (n < 8) b &= (1u << 4*n)-1;
+#endif
+  sb = lj_strfmt_putfxint(sb, sf, b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 

+ 2 - 2
Source/ThirdParty/LuaJIT/src/lib_debug.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Debug library.
 ** Debug library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
   return 1;
   return 1;
 }
 }
 
 
-LJLIB_CF(debug_getmetatable)
+LJLIB_CF(debug_getmetatable)	LJLIB_REC(.)
 {
 {
   lj_lib_checkany(L, 1);
   lj_lib_checkany(L, 1);
   if (!lua_getmetatable(L, 1)) {
   if (!lua_getmetatable(L, 1)) {

+ 35 - 16
Source/ThirdParty/LuaJIT/src/lib_ffi.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** FFI library.
 ** FFI library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lib_ffi_c
 #define lib_ffi_c
@@ -29,6 +29,7 @@
 #include "lj_ccall.h"
 #include "lj_ccall.h"
 #include "lj_ccallback.h"
 #include "lj_ccallback.h"
 #include "lj_clib.h"
 #include "lj_clib.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
@@ -136,7 +137,8 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
 	return 0;
 	return 0;
       }
       }
     }
     }
-    tv = L->top-1;
+    copyTV(L, base, L->top);
+    tv = L->top-1-LJ_FR2;
   }
   }
   return lj_meta_tailcall(L, tv);
   return lj_meta_tailcall(L, tv);
 }
 }
@@ -317,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
       }
       }
     }
     }
   }
   }
-  lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
+  lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
 checkgc:
 checkgc:
   lj_gc_check(L);
   lj_gc_check(L);
   return 1;
   return 1;
@@ -503,10 +505,7 @@ LJLIB_CF(ffi_new)	LJLIB_REC(.)
   }
   }
   if (sz == CTSIZE_INVALID)
   if (sz == CTSIZE_INVALID)
     lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
     lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
-  if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
-    cd = lj_cdata_new(cts, id, sz);
-  else
-    cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
+  cd = lj_cdata_newx(cts, id, sz, info);
   setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
   setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
   lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
   lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
@@ -557,6 +556,31 @@ LJLIB_CF(ffi_typeof)	LJLIB_REC(.)
   return 1;
   return 1;
 }
 }
 
 
+/* Internal and unsupported API. */
+LJLIB_CF(ffi_typeinfo)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = (CTypeID)ffi_checkint(L, 1);
+  if (id > 0 && id < cts->top) {
+    CType *ct = ctype_get(cts, id);
+    GCtab *t;
+    lua_createtable(L, 0, 4);  /* Increment hash size if fields are added. */
+    t = tabV(L->top-1);
+    setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
+    if (ct->size != CTSIZE_INVALID)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
+    if (ct->sib)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
+    if (gcref(ct->name)) {
+      GCstr *s = gco2str(gcref(ct->name));
+      setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
+    }
+    lj_gc_check(L);
+    return 1;
+  }
+  return 0;
+}
+
 LJLIB_CF(ffi_istype)	LJLIB_REC(.)
 LJLIB_CF(ffi_istype)	LJLIB_REC(.)
 {
 {
   CTState *cts = ctype_cts(L);
   CTState *cts = ctype_cts(L);
@@ -724,6 +748,9 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
   case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
   case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
 #endif
 #endif
   case H_(3af93066,1f001464): b = 1; break;  /* le/be */
   case H_(3af93066,1f001464): b = 1; break;  /* le/be */
+#if LJ_GC64
+  case H_(9e89d2c9,13c83c92): b = 1; break;  /* gc64 */
+#endif
   default:
   default:
     break;
     break;
   }
   }
@@ -767,19 +794,11 @@ LJLIB_CF(ffi_gc)	LJLIB_REC(.)
   GCcdata *cd = ffi_checkcdata(L, 1);
   GCcdata *cd = ffi_checkcdata(L, 1);
   TValue *fin = lj_lib_checkany(L, 2);
   TValue *fin = lj_lib_checkany(L, 2);
   CTState *cts = ctype_cts(L);
   CTState *cts = ctype_cts(L);
-  GCtab *t = cts->finalizer;
   CType *ct = ctype_raw(cts, cd->ctypeid);
   CType *ct = ctype_raw(cts, cd->ctypeid);
   if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
   if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
 	ctype_isrefarray(ct->info)))
 	ctype_isrefarray(ct->info)))
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
-  if (gcref(t->metatable)) {  /* Update finalizer table, if still enabled. */
-    copyTV(L, lj_tab_set(L, t, L->base), fin);
-    lj_gc_anybarriert(L, t);
-    if (!tvisnil(fin))
-      cd->marked |= LJ_GC_CDATA_FIN;
-    else
-      cd->marked &= ~LJ_GC_CDATA_FIN;
-  }
+  lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
   L->top = L->base+1;  /* Pass through the cdata object. */
   L->top = L->base+1;  /* Pass through the cdata object. */
   return 1;
   return 1;
 }
 }

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lib_init.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Library initialization.
 ** Library initialization.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major parts taken verbatim from the Lua interpreter.
 ** Major parts taken verbatim from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

+ 26 - 24
Source/ThirdParty/LuaJIT/src/lib_io.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** I/O library.
 ** I/O library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -19,8 +19,10 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_state.h"
 #include "lj_state.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
   IOFileUD *iof = io_file_new(L);
   IOFileUD *iof = io_file_new(L);
   iof->fp = fopen(fname, mode);
   iof->fp = fopen(fname, mode);
   if (iof->fp == NULL)
   if (iof->fp == NULL)
-    luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
+    luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
   return iof;
   return iof;
 }
 }
 
 
@@ -97,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
     int stat = -1;
     int stat = -1;
 #if LJ_TARGET_POSIX
 #if LJ_TARGET_POSIX
     stat = pclose(iof->fp);
     stat = pclose(iof->fp);
-#elif LJ_TARGET_WINDOWS
+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE
     stat = _pclose(iof->fp);
     stat = _pclose(iof->fp);
 #else
 #else
     lua_assert(0);
     lua_assert(0);
@@ -145,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
   MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
   MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
   char *buf;
   char *buf;
   for (;;) {
   for (;;) {
-    buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    buf = lj_buf_tmp(L, m);
     if (fgets(buf+n, m-n, fp) == NULL) break;
     if (fgets(buf+n, m-n, fp) == NULL) break;
     n += (MSize)strlen(buf+n);
     n += (MSize)strlen(buf+n);
     ok |= n;
     ok |= n;
@@ -161,7 +163,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
 {
 {
   MSize m, n;
   MSize m, n;
   for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
   for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     n += (MSize)fread(buf+n, 1, m-n, fp);
     n += (MSize)fread(buf+n, 1, m-n, fp);
     if (n != m) {
     if (n != m) {
       setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
       setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +176,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
 static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
 static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
 {
 {
   if (m) {
   if (m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     MSize n = (MSize)fread(buf, 1, m, fp);
     MSize n = (MSize)fread(buf, 1, m, fp);
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
     lj_gc_check(L);
     lj_gc_check(L);
@@ -230,19 +232,12 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
   cTValue *tv;
   cTValue *tv;
   int status = 1;
   int status = 1;
   for (tv = L->base+start; tv < L->top; tv++) {
   for (tv = L->base+start; tv < L->top; tv++) {
-    if (tvisstr(tv)) {
-      MSize len = strV(tv)->len;
-      status = status && (fwrite(strVdata(tv), 1, len, fp) == len);
-    } else if (tvisint(tv)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(tv));
-      size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
-      status = status && (fwrite(p, 1, len, fp) == len);
-    } else if (tvisnum(tv)) {
-      status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
-    } else {
+    char buf[STRFMT_MAXBUF_NUM];
+    MSize len;
+    const char *p = lj_strfmt_wstrnum(buf, tv, &len);
+    if (!p)
       lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
       lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
-    }
+    status = status && (fwrite(p, 1, len, fp) == len);
   }
   }
   if (LJ_52 && status) {
   if (LJ_52 && status) {
     L->top = L->base+1;
     L->top = L->base+1;
@@ -278,6 +273,15 @@ static int io_file_iter(lua_State *L)
   return n;
   return n;
 }
 }
 
 
+static int io_file_lines(lua_State *L)
+{
+  int n = (int)(L->top - L->base);
+  if (n > LJ_MAX_UPVAL)
+    lj_err_caller(L, LJ_ERR_UNPACK);
+  lua_pushcclosure(L, io_file_iter, n);
+  return 1;
+}
+
 /* -- I/O file methods ---------------------------------------------------- */
 /* -- I/O file methods ---------------------------------------------------- */
 
 
 #define LJLIB_MODULE_io_method
 #define LJLIB_MODULE_io_method
@@ -361,8 +365,7 @@ LJLIB_CF(io_method_setvbuf)
 LJLIB_CF(io_method_lines)
 LJLIB_CF(io_method_lines)
 {
 {
   io_tofile(L);
   io_tofile(L);
-  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
-  return 1;
+  return io_file_lines(L);
 }
 }
 
 
 LJLIB_CF(io_method___gc)
 LJLIB_CF(io_method___gc)
@@ -405,7 +408,7 @@ LJLIB_CF(io_open)
 
 
 LJLIB_CF(io_popen)
 LJLIB_CF(io_popen)
 {
 {
-#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS
+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE)
   const char *fname = strdata(lj_lib_checkstr(L, 1));
   const char *fname = strdata(lj_lib_checkstr(L, 1));
   GCstr *s = lj_lib_optstr(L, 2);
   GCstr *s = lj_lib_optstr(L, 2);
   const char *mode = s ? strdata(s) : "r";
   const char *mode = s ? strdata(s) : "r";
@@ -426,7 +429,7 @@ LJLIB_CF(io_popen)
 LJLIB_CF(io_tmpfile)
 LJLIB_CF(io_tmpfile)
 {
 {
   IOFileUD *iof = io_file_new(L);
   IOFileUD *iof = io_file_new(L);
-#if LJ_TARGET_PS3 || LJ_TARGET_PS4
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
   iof->fp = NULL; errno = ENOSYS;
   iof->fp = NULL; errno = ENOSYS;
 #else
 #else
   iof->fp = tmpfile();
   iof->fp = tmpfile();
@@ -492,8 +495,7 @@ LJLIB_CF(io_lines)
   } else {  /* io.lines() iterates over stdin. */
   } else {  /* io.lines() iterates over stdin. */
     setudataV(L, L->base, IOSTDF_UD(L, GCROOT_IO_INPUT));
     setudataV(L, L->base, IOSTDF_UD(L, GCROOT_IO_INPUT));
   }
   }
-  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
-  return 1;
+  return io_file_lines(L);
 }
 }
 
 
 LJLIB_CF(io_type)
 LJLIB_CF(io_type)

+ 130 - 26
Source/ThirdParty/LuaJIT/src/lib_jit.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** JIT library.
 ** JIT library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lib_jit_c
 #define lib_jit_c
@@ -10,13 +10,17 @@
 #include "lauxlib.h"
 #include "lauxlib.h"
 #include "lualib.h"
 #include "lualib.h"
 
 
-#include "lj_arch.h"
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
 #include "lj_debug.h"
 #include "lj_debug.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
+#include "lj_state.h"
 #include "lj_bc.h"
 #include "lj_bc.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
 #if LJ_HASJIT
 #if LJ_HASJIT
 #include "lj_ir.h"
 #include "lj_ir.h"
 #include "lj_jit.h"
 #include "lj_jit.h"
@@ -24,6 +28,7 @@
 #include "lj_iropt.h"
 #include "lj_iropt.h"
 #include "lj_target.h"
 #include "lj_target.h"
 #endif
 #endif
+#include "lj_trace.h"
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "lj_vmevent.h"
 #include "lj_vmevent.h"
@@ -279,7 +284,7 @@ static GCtrace *jit_checktrace(lua_State *L)
 /* Names of link types. ORDER LJ_TRLINK */
 /* Names of link types. ORDER LJ_TRLINK */
 static const char *const jit_trlinkname[] = {
 static const char *const jit_trlinkname[] = {
   "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
   "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
-  "interpreter", "return"
+  "interpreter", "return", "stitch"
 };
 };
 
 
 /* local info = jit.util.traceinfo(tr) */
 /* local info = jit.util.traceinfo(tr) */
@@ -332,6 +337,13 @@ LJLIB_CF(jit_util_tracek)
       slot = ir->op2;
       slot = ir->op2;
       ir = &T->ir[ir->op1];
       ir = &T->ir[ir->op1];
     }
     }
+#if LJ_HASFFI
+    if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
+      ptrdiff_t oldtop = savestack(L, L->top);
+      luaopen_ffi(L);  /* Load FFI library on-demand. */
+      L->top = restorestack(L, oldtop);
+    }
+#endif
     lj_ir_kvalue(L, L->top-2, ir);
     lj_ir_kvalue(L, L->top-2, ir);
     setintV(L->top-1, (int32_t)irt_type(ir->t));
     setintV(L->top-1, (int32_t)irt_type(ir->t));
     if (slot == -1)
     if (slot == -1)
@@ -416,6 +428,12 @@ LJLIB_CF(jit_util_ircalladdr)
 
 
 #include "lj_libdef.h"
 #include "lj_libdef.h"
 
 
+static int luaopen_jit_util(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_util);
+  return 1;
+}
+
 /* -- jit.opt module ------------------------------------------------------ */
 /* -- jit.opt module ------------------------------------------------------ */
 
 
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -513,6 +531,104 @@ LJLIB_CF(jit_opt_start)
 
 
 #endif
 #endif
 
 
+/* -- jit.profile module -------------------------------------------------- */
+
+#if LJ_HASPROFILE
+
+#define LJLIB_MODULE_jit_profile
+
+/* Not loaded by default, use: local profile = require("jit.profile") */
+
+static const char KEY_PROFILE_THREAD = 't';
+static const char KEY_PROFILE_FUNC = 'f';
+
+static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
+				 int vmstate)
+{
+  TValue key;
+  cTValue *tv;
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  tv = lj_tab_get(L, tabV(registry(L)), &key);
+  if (tvisfunc(tv)) {
+    char vmst = (char)vmstate;
+    int status;
+    setfuncV(L2, L2->top++, funcV(tv));
+    setthreadV(L2, L2->top++, L);
+    setintV(L2->top++, samples);
+    setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
+    status = lua_pcall(L2, 3, 0, 0);  /* callback(thread, samples, vmstate) */
+    if (status) {
+      if (G(L2)->panic) G(L2)->panic(L2);
+      exit(EXIT_FAILURE);
+    }
+    lj_trace_abort(G(L2));
+  }
+}
+
+/* profile.start(mode, cb) */
+LJLIB_CF(jit_profile_start)
+{
+  GCtab *registry = tabV(registry(L));
+  GCstr *mode = lj_lib_optstr(L, 1);
+  GCfunc *func = lj_lib_checkfunc(L, 2);
+  lua_State *L2 = lua_newthread(L);  /* Thread that runs profiler callback. */
+  TValue key;
+  /* Anchor thread and function in registry. */
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setthreadV(L, lj_tab_set(L, registry, &key), L2);
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setfuncV(L, lj_tab_set(L, registry, &key), func);
+  lj_gc_anybarriert(L, registry);
+  luaJIT_profile_start(L, mode ? strdata(mode) : "",
+		       (luaJIT_profile_callback)jit_profile_callback, L2);
+  return 0;
+}
+
+/* profile.stop() */
+LJLIB_CF(jit_profile_stop)
+{
+  GCtab *registry;
+  TValue key;
+  luaJIT_profile_stop(L);
+  registry = tabV(registry(L));
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setnilV(lj_tab_set(L, registry, &key));
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setnilV(lj_tab_set(L, registry, &key));
+  lj_gc_anybarriert(L, registry);
+  return 0;
+}
+
+/* dump = profile.dumpstack([thread,] fmt, depth) */
+LJLIB_CF(jit_profile_dumpstack)
+{
+  lua_State *L2 = L;
+  int arg = 0;
+  size_t len;
+  int depth;
+  GCstr *fmt;
+  const char *p;
+  if (L->top > L->base && tvisthread(L->base)) {
+    L2 = threadV(L->base);
+    arg = 1;
+  }
+  fmt = lj_lib_checkstr(L, arg+1);
+  depth = lj_lib_checkint(L, arg+2);
+  p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
+  lua_pushlstring(L, p, len);
+  return 1;
+}
+
+#include "lj_libdef.h"
+
+static int luaopen_jit_profile(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_profile);
+  return 1;
+}
+
+#endif
+
 /* -- JIT compiler initialization ----------------------------------------- */
 /* -- JIT compiler initialization ----------------------------------------- */
 
 
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -538,23 +654,17 @@ static uint32_t jit_cpudetect(lua_State *L)
   uint32_t features[4];
   uint32_t features[4];
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
 #if !LJ_HASJIT
 #if !LJ_HASJIT
-#define JIT_F_CMOV	1
 #define JIT_F_SSE2	2
 #define JIT_F_SSE2	2
 #endif
 #endif
-    flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
 #if LJ_HASJIT
 #if LJ_HASJIT
     flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
     flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
-      if ((features[0] & 0x0ff00f00) == 0x00000f00)  /* P4. */
-	flags |= JIT_F_P4;  /* Currently unused. */
-      else if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
+      if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
 	flags |= JIT_F_LEA_AGU;
 	flags |= JIT_F_LEA_AGU;
     } else if (vendor[2] == 0x444d4163) {  /* AMD. */
     } else if (vendor[2] == 0x444d4163) {  /* AMD. */
       uint32_t fam = (features[0] & 0x0ff00f00);
       uint32_t fam = (features[0] & 0x0ff00f00);
-      if (fam == 0x00000f00)  /* K8. */
-	flags |= JIT_F_SPLIT_XMM;
       if (fam >= 0x00000f00)  /* K8, K10. */
       if (fam >= 0x00000f00)  /* K8, K10. */
 	flags |= JIT_F_PREFER_IMUL;
 	flags |= JIT_F_PREFER_IMUL;
     }
     }
@@ -562,14 +672,8 @@ static uint32_t jit_cpudetect(lua_State *L)
   }
   }
   /* Check for required instruction set support on x86 (unnecessary on x64). */
   /* Check for required instruction set support on x86 (unnecessary on x64). */
 #if LJ_TARGET_X86
 #if LJ_TARGET_X86
-#if !defined(LUAJIT_CPU_NOCMOV)
-  if (!(flags & JIT_F_CMOV))
-    luaL_error(L, "CPU not supported");
-#endif
-#if defined(LUAJIT_CPU_SSE2)
   if (!(flags & JIT_F_SSE2))
   if (!(flags & JIT_F_SSE2))
-    luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
-#endif
+    luaL_error(L, "CPU with SSE2 required");
 #endif
 #endif
 #elif LJ_TARGET_ARM
 #elif LJ_TARGET_ARM
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -591,6 +695,8 @@ static uint32_t jit_cpudetect(lua_State *L)
 	   ver >= 60 ? JIT_F_ARMV6_ : 0;
 	   ver >= 60 ? JIT_F_ARMV6_ : 0;
   flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
   flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
 #endif
 #endif
+#elif LJ_TARGET_ARM64
+  /* No optional CPU features to detect (for now). */
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 #if LJ_HASJIT
 #if LJ_HASJIT
 #if LJ_ARCH_SQRT
 #if LJ_ARCH_SQRT
@@ -600,8 +706,6 @@ static uint32_t jit_cpudetect(lua_State *L)
   flags |= JIT_F_ROUND;
   flags |= JIT_F_ROUND;
 #endif
 #endif
 #endif
 #endif
-#elif LJ_TARGET_PPCSPE
-  /* Nothing to do. */
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 #if LJ_HASJIT
 #if LJ_HASJIT
   /* Compile-time MIPS CPU detection. */
   /* Compile-time MIPS CPU detection. */
@@ -631,11 +735,7 @@ static void jit_init(lua_State *L)
   uint32_t flags = jit_cpudetect(L);
   uint32_t flags = jit_cpudetect(L);
 #if LJ_HASJIT
 #if LJ_HASJIT
   jit_State *J = L2J(L);
   jit_State *J = L2J(L);
-#if LJ_TARGET_X86
-  /* Silently turn off the JIT compiler on CPUs without SSE2. */
-  if ((flags & JIT_F_SSE2))
-#endif
-    J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
+  J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
   memcpy(J->param, jit_param_default, sizeof(J->param));
   memcpy(J->param, jit_param_default, sizeof(J->param));
   lj_dispatch_update(G(L));
   lj_dispatch_update(G(L));
 #else
 #else
@@ -645,19 +745,23 @@ static void jit_init(lua_State *L)
 
 
 LUALIB_API int luaopen_jit(lua_State *L)
 LUALIB_API int luaopen_jit(lua_State *L)
 {
 {
+  jit_init(L);
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
   lua_pushinteger(L, LUAJIT_VERSION_NUM);
   lua_pushinteger(L, LUAJIT_VERSION_NUM);
   lua_pushliteral(L, LUAJIT_VERSION);
   lua_pushliteral(L, LUAJIT_VERSION);
   LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
   LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
+#if LJ_HASPROFILE
+  lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
+		tabref(L->env));
+#endif
 #ifndef LUAJIT_DISABLE_JITUTIL
 #ifndef LUAJIT_DISABLE_JITUTIL
-  LJ_LIB_REG(L, "jit.util", jit_util);
+  lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
 #endif
 #endif
 #if LJ_HASJIT
 #if LJ_HASJIT
   LJ_LIB_REG(L, "jit.opt", jit_opt);
   LJ_LIB_REG(L, "jit.opt", jit_opt);
 #endif
 #endif
   L->top -= 2;
   L->top -= 2;
-  jit_init(L);
   return 1;
   return 1;
 }
 }
 
 

+ 5 - 8
Source/ThirdParty/LuaJIT/src/lib_math.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Math library.
 ** Math library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include <math.h>
 #include <math.h>
@@ -47,12 +47,6 @@ LJLIB_ASM_(math_tanh)		LJLIB_REC(math_htrig IRCALL_tanh)
 LJLIB_ASM_(math_frexp)
 LJLIB_ASM_(math_frexp)
 LJLIB_ASM_(math_modf)		LJLIB_REC(.)
 LJLIB_ASM_(math_modf)		LJLIB_REC(.)
 
 
-LJLIB_PUSH(57.29577951308232)
-LJLIB_ASM_(math_deg)		LJLIB_REC(math_degrad)
-
-LJLIB_PUSH(0.017453292519943295)
-LJLIB_ASM_(math_rad)		LJLIB_REC(math_degrad)
-
 LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 {
 {
   double x = lj_lib_checknum(L, 1);
   double x = lj_lib_checknum(L, 1);
@@ -63,12 +57,15 @@ LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 #else
 #else
     x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
     x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
 #endif
 #endif
-    setnumV(L->base-1, x*y);  /* Do NOT join the expression to x / y. */
+    setnumV(L->base-1-LJ_FR2, x*y);  /* Do NOT join the expression to x / y. */
     return FFH_RES(1);
     return FFH_RES(1);
   }
   }
   return FFH_RETRY;
   return FFH_RETRY;
 }
 }
 
 
+LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
+LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
+
 LJLIB_ASM(math_atan2)		LJLIB_REC(.)
 LJLIB_ASM(math_atan2)		LJLIB_REC(.)
 {
 {
   lj_lib_checknum(L, 1);
   lj_lib_checknum(L, 1);

+ 32 - 20
Source/ThirdParty/LuaJIT/src/lib_os.c

@@ -1,13 +1,12 @@
 /*
 /*
 ** OS library.
 ** OS library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 */
 */
 
 
 #include <errno.h>
 #include <errno.h>
-#include <locale.h>
 #include <time.h>
 #include <time.h>
 
 
 #define lib_os_c
 #define lib_os_c
@@ -18,7 +17,10 @@
 #include "lualib.h"
 #include "lualib.h"
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 #if LJ_TARGET_POSIX
 #if LJ_TARGET_POSIX
@@ -27,13 +29,17 @@
 #include <stdio.h>
 #include <stdio.h>
 #endif
 #endif
 
 
+#if !LJ_TARGET_PSVITA
+#include <locale.h>
+#endif
+
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_os
 #define LJLIB_MODULE_os
 
 
 LJLIB_CF(os_execute)
 LJLIB_CF(os_execute)
 {
 {
-#if LJ_TARGET_CONSOLE
+#if LJ_NO_SYSTEM
 #if LJ_52
 #if LJ_52
   errno = ENOSYS;
   errno = ENOSYS;
   return luaL_fileresult(L, 0, NULL);
   return luaL_fileresult(L, 0, NULL);
@@ -70,7 +76,7 @@ LJLIB_CF(os_rename)
 
 
 LJLIB_CF(os_tmpname)
 LJLIB_CF(os_tmpname)
 {
 {
-#if LJ_TARGET_PS3 || LJ_TARGET_PS4
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
   lj_err_caller(L, LJ_ERR_OSUNIQF);
   lj_err_caller(L, LJ_ERR_OSUNIQF);
   return 0;
   return 0;
 #else
 #else
@@ -185,7 +191,7 @@ LJLIB_CF(os_date)
 #endif
 #endif
   }
   }
   if (stm == NULL) {  /* Invalid date? */
   if (stm == NULL) {  /* Invalid date? */
-    setnilV(L->top-1);
+    setnilV(L->top++);
   } else if (strcmp(s, "*t") == 0) {
   } else if (strcmp(s, "*t") == 0) {
     lua_createtable(L, 0, 9);  /* 9 = number of fields */
     lua_createtable(L, 0, 9);  /* 9 = number of fields */
     setfield(L, "sec", stm->tm_sec);
     setfield(L, "sec", stm->tm_sec);
@@ -197,23 +203,25 @@ LJLIB_CF(os_date)
     setfield(L, "wday", stm->tm_wday+1);
     setfield(L, "wday", stm->tm_wday+1);
     setfield(L, "yday", stm->tm_yday+1);
     setfield(L, "yday", stm->tm_yday+1);
     setboolfield(L, "isdst", stm->tm_isdst);
     setboolfield(L, "isdst", stm->tm_isdst);
-  } else {
-    char cc[3];
-    luaL_Buffer b;
-    cc[0] = '%'; cc[2] = '\0';
-    luaL_buffinit(L, &b);
-    for (; *s; s++) {
-      if (*s != '%' || *(s + 1) == '\0') {  /* No conversion specifier? */
-	luaL_addchar(&b, *s);
-      } else {
-	size_t reslen;
-	char buff[200];  /* Should be big enough for any conversion result. */
-	cc[1] = *(++s);
-	reslen = strftime(buff, sizeof(buff), cc, stm);
-	luaL_addlstring(&b, buff, reslen);
+  } else if (*s) {
+    SBuf *sb = &G(L)->tmpbuf;
+    MSize sz = 0;
+    const char *q;
+    for (q = s; *q; q++)
+      sz += (*q == '%') ? 30 : 1;  /* Overflow doesn't matter. */
+    setsbufL(sb, L);
+    for (;;) {
+      char *buf = lj_buf_need(sb, sz);
+      size_t len = strftime(buf, sbufsz(sb), s, stm);
+      if (len) {
+	setstrV(L, L->top++, lj_str_new(L, buf, len));
+	lj_gc_check(L);
+	break;
       }
       }
+      sz += (sz|1);
     }
     }
-    luaL_pushresult(&b);
+  } else {
+    setstrV(L, L->top++, &G(L)->strempty);
   }
   }
   return 1;
   return 1;
 }
 }
@@ -254,6 +262,9 @@ LJLIB_CF(os_difftime)
 
 
 LJLIB_CF(os_setlocale)
 LJLIB_CF(os_setlocale)
 {
 {
+#if LJ_TARGET_PSVITA
+  lua_pushliteral(L, "C");
+#else
   GCstr *s = lj_lib_optstr(L, 1);
   GCstr *s = lj_lib_optstr(L, 1);
   const char *str = s ? strdata(s) : NULL;
   const char *str = s ? strdata(s) : NULL;
   int opt = lj_lib_checkopt(L, 2, 6,
   int opt = lj_lib_checkopt(L, 2, 6,
@@ -265,6 +276,7 @@ LJLIB_CF(os_setlocale)
   else if (opt == 4) opt = LC_MONETARY;
   else if (opt == 4) opt = LC_MONETARY;
   else if (opt == 6) opt = LC_ALL;
   else if (opt == 6) opt = LC_ALL;
   lua_pushstring(L, setlocale(opt, str));
   lua_pushstring(L, setlocale(opt, str));
+#endif
   return 1;
   return 1;
 }
 }
 
 

+ 12 - 4
Source/ThirdParty/LuaJIT/src/lib_package.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Package library.
 ** Package library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -96,9 +96,17 @@ static void setprogdir(lua_State *L)
 static void pusherror(lua_State *L)
 static void pusherror(lua_State *L)
 {
 {
   DWORD error = GetLastError();
   DWORD error = GetLastError();
+#if LJ_TARGET_XBOXONE
+  wchar_t wbuffer[128];
+  char buffer[128*2];
+  if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
+      NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) &&
+      WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL))
+#else
   char buffer[128];
   char buffer[128];
   if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
   if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
       NULL, error, 0, buffer, sizeof(buffer), NULL))
       NULL, error, 0, buffer, sizeof(buffer), NULL))
+#endif
     lua_pushstring(L, buffer);
     lua_pushstring(L, buffer);
   else
   else
     lua_pushfstring(L, "system error %d\n", error);
     lua_pushfstring(L, "system error %d\n", error);
@@ -111,7 +119,7 @@ static void ll_unloadlib(void *lib)
 
 
 static void *ll_load(lua_State *L, const char *path, int gl)
 static void *ll_load(lua_State *L, const char *path, int gl)
 {
 {
-  HINSTANCE lib = LoadLibraryA(path);
+  HINSTANCE lib = LoadLibraryExA(path, NULL, 0);
   if (lib == NULL) pusherror(L);
   if (lib == NULL) pusherror(L);
   UNUSED(gl);
   UNUSED(gl);
   return lib;
   return lib;
@@ -226,7 +234,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
       const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
       const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
       lua_pop(L, 1);
       lua_pop(L, 1);
       if (bcdata) {
       if (bcdata) {
-	if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+	if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
 	  return PACKAGE_ERR_LOAD;
 	  return PACKAGE_ERR_LOAD;
 	return 0;
 	return 0;
       }
       }
@@ -383,7 +391,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
   if (lua_isnil(L, -1)) {  /* Not found? */
   if (lua_isnil(L, -1)) {  /* Not found? */
     const char *bcname = mksymname(L, name, SYMPREFIX_BC);
     const char *bcname = mksymname(L, name, SYMPREFIX_BC);
     const char *bcdata = ll_bcsym(NULL, bcname);
     const char *bcdata = ll_bcsym(NULL, bcname);
-    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
       lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
       lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
   }
   }
   return 1;
   return 1;

+ 131 - 319
Source/ThirdParty/LuaJIT/src/lib_string.c

@@ -1,13 +1,11 @@
 /*
 /*
 ** String library.
 ** String library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 */
 */
 
 
-#include <stdio.h>
-
 #define lib_string_c
 #define lib_string_c
 #define LUA_LIB
 #define LUA_LIB
 
 
@@ -18,6 +16,7 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
 #include "lj_meta.h"
@@ -25,17 +24,19 @@
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_bcdump.h"
 #include "lj_bcdump.h"
 #include "lj_char.h"
 #include "lj_char.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_string
 #define LJLIB_MODULE_string
 
 
-LJLIB_ASM(string_len)		LJLIB_REC(.)
-{
-  lj_lib_checkstr(L, 1);
-  return FFH_RETRY;
-}
+LJLIB_LUA(string_len) /*
+  function(s)
+    CHECK_str(s)
+    return #s
+  end
+*/
 
 
 LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
 LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
 {
 {
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
   lj_state_checkstack(L, (MSize)n);
   lj_state_checkstack(L, (MSize)n);
   p = (const unsigned char *)strdata(s) + start;
   p = (const unsigned char *)strdata(s) + start;
   for (i = 0; i < n; i++)
   for (i = 0; i < n; i++)
-    setintV(L->base + i-1, p[i]);
+    setintV(L->base + i-1-LJ_FR2, p[i]);
   return FFH_RES(n);
   return FFH_RES(n);
 }
 }
 
 
-LJLIB_ASM(string_char)
+LJLIB_ASM(string_char)		LJLIB_REC(.)
 {
 {
   int i, nargs = (int)(L->top - L->base);
   int i, nargs = (int)(L->top - L->base);
-  char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs);
+  char *buf = lj_buf_tmp(L, (MSize)nargs);
   for (i = 1; i <= nargs; i++) {
   for (i = 1; i <= nargs; i++) {
     int32_t k = lj_lib_checkint(L, i);
     int32_t k = lj_lib_checkint(L, i);
     if (!checku8(k))
     if (!checku8(k))
       lj_err_arg(L, i, LJ_ERR_BADVAL);
       lj_err_arg(L, i, LJ_ERR_BADVAL);
     buf[i-1] = (char)k;
     buf[i-1] = (char)k;
   }
   }
-  setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
+  setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
   return FFH_RES(1);
   return FFH_RES(1);
 }
 }
 
 
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub)		LJLIB_REC(string_range 1)
   return FFH_RETRY;
   return FFH_RETRY;
 }
 }
 
 
-LJLIB_ASM(string_rep)
+LJLIB_CF(string_rep)		LJLIB_REC(.)
 {
 {
   GCstr *s = lj_lib_checkstr(L, 1);
   GCstr *s = lj_lib_checkstr(L, 1);
-  int32_t k = lj_lib_checkint(L, 2);
+  int32_t rep = lj_lib_checkint(L, 2);
   GCstr *sep = lj_lib_optstr(L, 3);
   GCstr *sep = lj_lib_optstr(L, 3);
-  int32_t len = (int32_t)s->len;
-  global_State *g = G(L);
-  int64_t tlen;
-  const char *src;
-  char *buf;
-  if (k <= 0) {
-  empty:
-    setstrV(L, L->base-1, &g->strempty);
-    return FFH_RES(1);
+  SBuf *sb = lj_buf_tmp_(L);
+  if (sep && rep > 1) {
+    GCstr *s2 = lj_buf_cat2str(L, sep, s);
+    lj_buf_reset(sb);
+    lj_buf_putstr(sb, s);
+    s = s2;
+    rep--;
   }
   }
-  if (sep) {
-    tlen = (int64_t)len + sep->len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-    tlen *= k;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  } else {
-    tlen = (int64_t)k * len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  }
-  if (tlen == 0) goto empty;
-  buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
-  src = strdata(s);
-  if (sep) {
-    tlen -= sep->len;  /* Ignore trailing separator. */
-    if (k > 1) {  /* Paste one string and one separator. */
-      int32_t i;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = strdata(sep); len = sep->len;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = g->tmpbuf.buf; len += s->len; k--;  /* Now copy that k-1 times. */
-    }
-  }
-  do {
-    int32_t i = 0;
-    do { *buf++ = src[i++]; } while (i < len);
-  } while (--k > 0);
-  setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
-  return FFH_RES(1);
+  sb = lj_buf_putstr_rep(sb, s, rep);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
+  return 1;
 }
 }
 
 
-LJLIB_ASM(string_reverse)
+LJLIB_ASM(string_reverse)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
 {
 {
-  GCstr *s = lj_lib_checkstr(L, 1);
-  lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
+  lj_lib_checkstr(L, 1);
   return FFH_RETRY;
   return FFH_RETRY;
 }
 }
-LJLIB_ASM_(string_lower)
-LJLIB_ASM_(string_upper)
+LJLIB_ASM_(string_lower)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
+LJLIB_ASM_(string_upper)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-static int writer_buf(lua_State *L, const void *p, size_t size, void *b)
+static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
 {
 {
-  luaL_addlstring((luaL_Buffer *)b, (const char *)p, size);
+  lj_buf_putmem((SBuf *)sb, p, (MSize)size);
   UNUSED(L);
   UNUSED(L);
   return 0;
   return 0;
 }
 }
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump)
 {
 {
   GCfunc *fn = lj_lib_checkfunc(L, 1);
   GCfunc *fn = lj_lib_checkfunc(L, 1);
   int strip = L->base+1 < L->top && tvistruecond(L->base+1);
   int strip = L->base+1 < L->top && tvistruecond(L->base+1);
-  luaL_Buffer b;
+  SBuf *sb = lj_buf_tmp_(L);  /* Assumes lj_bcwrite() doesn't use tmpbuf. */
   L->top = L->base+1;
   L->top = L->base+1;
-  luaL_buffinit(L, &b);
-  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
+  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
     lj_err_caller(L, LJ_ERR_STRDUMP);
     lj_err_caller(L, LJ_ERR_STRDUMP);
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 
@@ -183,7 +154,6 @@ typedef struct MatchState {
 } MatchState;
 } MatchState;
 
 
 #define L_ESC		'%'
 #define L_ESC		'%'
-#define SPECIALS	"^$*+?.([%-"
 
 
 static int check_capture(MatchState *ms, int l)
 static int check_capture(MatchState *ms, int l)
 {
 {
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
   return s;
   return s;
 }
 }
 
 
-static const char *lmemfind(const char *s1, size_t l1,
-			    const char *s2, size_t l2)
-{
-  if (l2 == 0) {
-    return s1;  /* empty strings are everywhere */
-  } else if (l2 > l1) {
-    return NULL;  /* avoids a negative `l1' */
-  } else {
-    const char *init;  /* to search for a `*s2' inside `s1' */
-    l2--;  /* 1st char will be checked by `memchr' */
-    l1 = l1-l2;  /* `s2' cannot be found after that */
-    while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
-      init++;   /* 1st char is already checked */
-      if (memcmp(init, s2+1, l2) == 0) {
-	return init-1;
-      } else {  /* correct `l1' and `s1' to try again */
-	l1 -= (size_t)(init-s1);
-	s1 = init;
-      }
-    }
-    return NULL;  /* not found */
-  }
-}
-
 static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
 static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
 {
 {
   if (i >= ms->level) {
   if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
   return nlevels;  /* number of strings pushed */
   return nlevels;  /* number of strings pushed */
 }
 }
 
 
-static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
-{
-  /* relative string position: negative means back from end */
-  if (pos < 0) pos += (ptrdiff_t)len + 1;
-  return (pos >= 0) ? pos : 0;
-}
-
 static int str_find_aux(lua_State *L, int find)
 static int str_find_aux(lua_State *L, int find)
 {
 {
-  size_t l1, l2;
-  const char *s = luaL_checklstring(L, 1, &l1);
-  const char *p = luaL_checklstring(L, 2, &l2);
-  ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
-  if (init < 0) {
-    init = 0;
-  } else if ((size_t)(init) > l1) {
+  GCstr *s = lj_lib_checkstr(L, 1);
+  GCstr *p = lj_lib_checkstr(L, 2);
+  int32_t start = lj_lib_optint(L, 3, 1);
+  MSize st;
+  if (start < 0) start += (int32_t)s->len; else start--;
+  if (start < 0) start = 0;
+  st = (MSize)start;
+  if (st > s->len) {
 #if LJ_52
 #if LJ_52
     setnilV(L->top-1);
     setnilV(L->top-1);
     return 1;
     return 1;
 #else
 #else
-    init = (ptrdiff_t)l1;
+    st = s->len;
 #endif
 #endif
   }
   }
-  if (find && (lua_toboolean(L, 4) ||  /* explicit request? */
-      strpbrk(p, SPECIALS) == NULL)) {  /* or no special characters? */
-    /* do a plain search */
-    const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
-    if (s2) {
-      lua_pushinteger(L, s2-s+1);
-      lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
+  if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
+	       !lj_str_haspattern(p))) {  /* Search for fixed string. */
+    const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
+    if (q) {
+      setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
+      setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
       return 2;
       return 2;
     }
     }
-  } else {
+  } else {  /* Search for pattern. */
     MatchState ms;
     MatchState ms;
-    int anchor = (*p == '^') ? (p++, 1) : 0;
-    const char *s1=s+init;
+    const char *pstr = strdata(p);
+    const char *sstr = strdata(s) + st;
+    int anchor = 0;
+    if (*pstr == '^') { pstr++; anchor = 1; }
     ms.L = L;
     ms.L = L;
-    ms.src_init = s;
-    ms.src_end = s+l1;
-    do {
-      const char *res;
+    ms.src_init = strdata(s);
+    ms.src_end = strdata(s) + s->len;
+    do {  /* Loop through string and try to match the pattern. */
+      const char *q;
       ms.level = ms.depth = 0;
       ms.level = ms.depth = 0;
-      if ((res=match(&ms, s1, p)) != NULL) {
+      q = match(&ms, sstr, pstr);
+      if (q) {
 	if (find) {
 	if (find) {
-	  lua_pushinteger(L, s1-s+1);  /* start */
-	  lua_pushinteger(L, res-s);   /* end */
-	  return push_captures(&ms, NULL, 0) + 2;
+	  setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
+	  setintV(L->top++, (int32_t)(q-strdata(s)));
+	  return push_captures(&ms, NULL, NULL) + 2;
 	} else {
 	} else {
-	  return push_captures(&ms, s1, res);
+	  return push_captures(&ms, sstr, q);
 	}
 	}
       }
       }
-    } while (s1++ < ms.src_end && !anchor);
+    } while (sstr++ < ms.src_end && !anchor);
   }
   }
-  lua_pushnil(L);  /* not found */
+  setnilV(L->top-1);  /* Not found. */
   return 1;
   return 1;
 }
 }
 
 
-LJLIB_CF(string_find)
+LJLIB_CF(string_find)		LJLIB_REC(.)
 {
 {
   return str_find_aux(L, 1);
   return str_find_aux(L, 1);
 }
 }
@@ -698,221 +640,91 @@ LJLIB_CF(string_gsub)
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
-/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
-#define MAX_FMTITEM	512
-/* valid flags in a format specification */
-#define FMT_FLAGS	"-+ #0"
-/*
-** maximum size of each format specification (such as '%-099.99d')
-** (+10 accounts for %99.99x plus margin of error)
-*/
-#define MAX_FMTSPEC	(sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
-
-static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
-{
-  GCstr *str = lj_lib_checkstr(L, arg);
-  int32_t len = (int32_t)str->len;
-  const char *s = strdata(str);
-  luaL_addchar(b, '"');
-  while (len--) {
-    uint32_t c = uchar(*s);
-    if (c == '"' || c == '\\' || c == '\n') {
-      luaL_addchar(b, '\\');
-    } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
-      uint32_t d;
-      luaL_addchar(b, '\\');
-      if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
-	luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
-	goto tens;
-      } else if (c >= 10) {
-      tens:
-	d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
-      }
-      c += '0';
-    }
-    luaL_addchar(b, c);
-    s++;
-  }
-  luaL_addchar(b, '"');
-}
-
-static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
-{
-  const char *p = strfrmt;
-  while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++;  /* skip flags */
-  if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
-    lj_err_caller(L, LJ_ERR_STRFMTR);
-  if (lj_char_isdigit(uchar(*p))) p++;  /* skip width */
-  if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  if (*p == '.') {
-    p++;
-    if (lj_char_isdigit(uchar(*p))) p++;  /* skip precision */
-    if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  }
-  if (lj_char_isdigit(uchar(*p)))
-    lj_err_caller(L, LJ_ERR_STRFMTW);
-  *(form++) = '%';
-  strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
-  form += p - strfrmt + 1;
-  *form = '\0';
-  return p;
-}
-
-static void addintlen(char *form)
-{
-  size_t l = strlen(form);
-  char spec = form[l - 1];
-  strcpy(form + l - 1, LUA_INTFRMLEN);
-  form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
-  form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
-}
-
-static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (LUA_INTFRM_T)intV(o);
-    else
-      return (LUA_INTFRM_T)numV(o);
-  }
-}
-
-static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (unsigned LUA_INTFRM_T)intV(o);
-    else if ((int32_t)o->u32.hi < 0)
-      return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
-    else
-      return (unsigned LUA_INTFRM_T)numV(o);
-  }
-}
-
-static GCstr *meta_tostring(lua_State *L, int arg)
+/* Emulate tostring() inline. */
+static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
 {
 {
   TValue *o = L->base+arg-1;
   TValue *o = L->base+arg-1;
   cTValue *mo;
   cTValue *mo;
   lua_assert(o < L->top);  /* Caller already checks for existence. */
   lua_assert(o < L->top);  /* Caller already checks for existence. */
   if (LJ_LIKELY(tvisstr(o)))
   if (LJ_LIKELY(tvisstr(o)))
     return strV(o);
     return strV(o);
-  if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+  if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
     copyTV(L, L->top++, mo);
     copyTV(L, L->top++, mo);
     copyTV(L, L->top++, o);
     copyTV(L, L->top++, o);
     lua_call(L, 1, 1);
     lua_call(L, 1, 1);
-    L->top--;
-    if (tvisstr(L->top))
-      return strV(L->top);
-    o = L->base+arg-1;
-    copyTV(L, o, L->top);
-  }
-  if (tvisnumber(o)) {
-    return lj_str_fromnumber(L, o);
-  } else if (tvisnil(o)) {
-    return lj_str_newlit(L, "nil");
-  } else if (tvisfalse(o)) {
-    return lj_str_newlit(L, "false");
-  } else if (tvistrue(o)) {
-    return lj_str_newlit(L, "true");
-  } else {
-    if (tvisfunc(o) && isffunc(funcV(o)))
-      lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
-    else
-      lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
-    L->top--;
-    return strV(L->top);
+    copyTV(L, L->base+arg-1, --L->top);
+    return NULL;  /* Buffer may be overwritten, retry. */
   }
   }
-}
-
-LJLIB_CF(string_format)
-{
-  int arg = 1, top = (int)(L->top - L->base);
-  GCstr *fmt = lj_lib_checkstr(L, arg);
-  const char *strfrmt = strdata(fmt);
-  const char *strfrmt_end = strfrmt + fmt->len;
-  luaL_Buffer b;
-  luaL_buffinit(L, &b);
-  while (strfrmt < strfrmt_end) {
-    if (*strfrmt != L_ESC) {
-      luaL_addchar(&b, *strfrmt++);
-    } else if (*++strfrmt == L_ESC) {
-      luaL_addchar(&b, *strfrmt++);  /* %% */
-    } else { /* format item */
-      char form[MAX_FMTSPEC];  /* to store the format (`%...') */
-      char buff[MAX_FMTITEM];  /* to store the formatted item */
+  return lj_strfmt_obj(L, o);
+}
+
+LJLIB_CF(string_format)		LJLIB_REC(.)
+{
+  int arg, top = (int)(L->top - L->base);
+  GCstr *fmt;
+  SBuf *sb;
+  FormatState fs;
+  SFormat sf;
+  int retry = 0;
+again:
+  arg = 1;
+  sb = lj_buf_tmp_(L);
+  fmt = lj_lib_checkstr(L, arg);
+  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+    if (sf == STRFMT_LIT) {
+      lj_buf_putmem(sb, fs.str, fs.len);
+    } else if (sf == STRFMT_ERR) {
+      lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
+    } else {
       if (++arg > top)
       if (++arg > top)
 	luaL_argerror(L, arg, lj_obj_typename[0]);
 	luaL_argerror(L, arg, lj_obj_typename[0]);
-      strfrmt = scanformat(L, strfrmt, form);
-      switch (*strfrmt++) {
-      case 'c':
-	sprintf(buff, form, lj_lib_checkint(L, arg));
+      switch (STRFMT_TYPE(sf)) {
+      case STRFMT_INT:
+	if (tvisint(L->base+arg-1)) {
+	  int32_t k = intV(L->base+arg-1);
+	  if (sf == STRFMT_INT)
+	    lj_strfmt_putint(sb, k);  /* Shortcut for plain %d. */
+	  else
+	    lj_strfmt_putfxint(sb, sf, k);
+	} else {
+	  lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+	}
 	break;
 	break;
-      case 'd':  case 'i':
-	addintlen(form);
-	sprintf(buff, form, num2intfrm(L, arg));
+      case STRFMT_UINT:
+	if (tvisint(L->base+arg-1))
+	  lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
+	else
+	  lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
 	break;
 	break;
-      case 'o':  case 'u':  case 'x':  case 'X':
-	addintlen(form);
-	sprintf(buff, form, num2uintfrm(L, arg));
+      case STRFMT_NUM:
+	lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
 	break;
 	break;
-      case 'e':  case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
-	TValue tv;
-	tv.n = lj_lib_checknum(L, arg);
-	if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
-	  /* Canonicalize output of non-finite values. */
-	  char *p, nbuf[LJ_STR_NUMBUF];
-	  size_t len = lj_str_bufnum(nbuf, &tv);
-	  if (strfrmt[-1] < 'a') {
-	    nbuf[len-3] = nbuf[len-3] - 0x20;
-	    nbuf[len-2] = nbuf[len-2] - 0x20;
-	    nbuf[len-1] = nbuf[len-1] - 0x20;
-	  }
-	  nbuf[len] = '\0';
-	  for (p = form; *p < 'A' && *p != '.'; p++) ;
-	  *p++ = 's'; *p = '\0';
-	  sprintf(buff, form, nbuf);
-	  break;
-	}
-	sprintf(buff, form, (double)tv.n);
+      case STRFMT_STR: {
+	GCstr *str = string_fmt_tostring(L, arg, retry);
+	if (str == NULL)
+	  retry = 1;
+	else if ((sf & STRFMT_T_QUOTED))
+	  lj_strfmt_putquoted(sb, str);  /* No formatting. */
+	else
+	  lj_strfmt_putfstr(sb, sf, str);
 	break;
 	break;
 	}
 	}
-      case 'q':
-	addquoted(L, &b, arg);
-	continue;
-      case 'p':
-	lj_str_pushf(L, "%p", lua_topointer(L, arg));
-	luaL_addvalue(&b);
-	continue;
-      case 's': {
-	GCstr *str = meta_tostring(L, arg);
-	if (!strchr(form, '.') && str->len >= 100) {
-	  /* no precision and string is too long to be formatted;
-	     keep original string */
-	  setstrV(L, L->top++, str);
-	  luaL_addvalue(&b);
-	  continue;
-	}
-	sprintf(buff, form, strdata(str));
+      case STRFMT_CHAR:
+	lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+	break;
+      case STRFMT_PTR:  /* No formatting. */
+	lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
 	break;
 	break;
-	}
       default:
       default:
-	lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
+	lua_assert(0);
 	break;
 	break;
       }
       }
-      luaL_addlstring(&b, buff, strlen(buff));
     }
     }
   }
   }
-  luaL_pushresult(&b);
+  if (retry++ == 1) goto again;
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 

+ 88 - 81
Source/ThirdParty/LuaJIT/src/lib_table.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Table library.
 ** Table library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -16,57 +16,43 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
+#include "lj_ff.h"
 #include "lj_lib.h"
 #include "lj_lib.h"
 
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #define LJLIB_MODULE_table
 #define LJLIB_MODULE_table
 
 
-LJLIB_CF(table_foreachi)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  MSize i, n = lj_tab_len(t);
-  for (i = 1; i <= n; i++) {
-    cTValue *val;
-    setfuncV(L, L->top, func);
-    setintV(L->top+1, i);
-    val = lj_tab_getint(t, (int32_t)i);
-    if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreachi) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for i=1,#t do
+      local r = f(i, t[i])
+      if r ~= nil then return r end
+    end
+  end
+*/
 
 
-LJLIB_CF(table_foreach)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  L->top = L->base+3;
-  setnilV(L->top-1);
-  while (lj_tab_next(L, t, L->top-1)) {
-    copyTV(L, L->top+2, L->top);
-    copyTV(L, L->top+1, L->top-1);
-    setfuncV(L, L->top, func);
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreach) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for k, v in PAIRS(t) do
+      local r = f(k, v)
+      if r ~= nil then return r end
+    end
+  end
+*/
 
 
-LJLIB_ASM(table_getn)		LJLIB_REC(.)
-{
-  lj_lib_checktab(L, 1);
-  return FFH_UNREACHABLE;
-}
+LJLIB_LUA(table_getn) /*
+  function(t)
+    CHECK_tab(t)
+    return #t
+  end
+*/
 
 
 LJLIB_CF(table_maxn)
 LJLIB_CF(table_maxn)
 {
 {
@@ -119,52 +105,47 @@ LJLIB_CF(table_insert)		LJLIB_REC(.)
   return 0;
   return 0;
 }
 }
 
 
-LJLIB_CF(table_remove)		LJLIB_REC(.)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  int32_t e = (int32_t)lj_tab_len(t);
-  int32_t pos = lj_lib_optint(L, 2, e);
-  if (!(1 <= pos && pos <= e))  /* Nothing to remove? */
-    return 0;
-  lua_rawgeti(L, 1, pos);  /* Get previous value. */
-  /* NOBARRIER: This just moves existing elements around. */
-  for (; pos < e; pos++) {
-    cTValue *src = lj_tab_getint(t, pos+1);
-    TValue *dst = lj_tab_setint(L, t, pos);
-    if (src) {
-      copyTV(L, dst, src);
-    } else {
-      setnilV(dst);
-    }
-  }
-  setnilV(lj_tab_setint(L, t, e));  /* Remove (last) value. */
-  return 1;  /* Return previous value. */
-}
+LJLIB_LUA(table_remove) /*
+  function(t, pos)
+    CHECK_tab(t)
+    local len = #t
+    if pos == nil then
+      if len ~= 0 then
+	local old = t[len]
+	t[len] = nil
+	return old
+      end
+    else
+      CHECK_int(pos)
+      if pos >= 1 and pos <= len then
+	local old = t[pos]
+	for i=pos+1,len do
+	  t[i-1] = t[i]
+	end
+	t[len] = nil
+	return old
+      end
+    end
+  end
+*/
 
 
-LJLIB_CF(table_concat)
+LJLIB_CF(table_concat)		LJLIB_REC(.)
 {
 {
-  luaL_Buffer b;
   GCtab *t = lj_lib_checktab(L, 1);
   GCtab *t = lj_lib_checktab(L, 1);
   GCstr *sep = lj_lib_optstr(L, 2);
   GCstr *sep = lj_lib_optstr(L, 2);
-  MSize seplen = sep ? sep->len : 0;
   int32_t i = lj_lib_optint(L, 3, 1);
   int32_t i = lj_lib_optint(L, 3, 1);
   int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
   int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
 	      lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
 	      lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
-  luaL_buffinit(L, &b);
-  if (i <= e) {
-    for (;;) {
-      cTValue *o;
-      lua_rawgeti(L, 1, i);
-      o = L->top-1;
-      if (!(tvisstr(o) || tvisnumber(o)))
-	lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
-      luaL_addvalue(&b);
-      if (i++ == e) break;
-      if (seplen)
-	luaL_addlstring(&b, strdata(sep), seplen);
-    }
+  SBuf *sb = lj_buf_tmp_(L);
+  SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
+  if (LJ_UNLIKELY(!sbx)) {  /* Error: bad element type. */
+    int32_t idx = (int32_t)(intptr_t)sbufP(sb);
+    cTValue *o = lj_tab_getint(t, idx);
+    lj_err_callerv(L, LJ_ERR_TABCAT,
+		   lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
   }
   }
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sbx));
+  lj_gc_check(L);
   return 1;
   return 1;
 }
 }
 
 
@@ -284,6 +265,30 @@ LJLIB_CF(table_pack)
 }
 }
 #endif
 #endif
 
 
+LJLIB_NOREG LJLIB_CF(table_new)		LJLIB_REC(.)
+{
+  int32_t a = lj_lib_checkint(L, 1);
+  int32_t h = lj_lib_checkint(L, 2);
+  lua_createtable(L, a, h);
+  return 1;
+}
+
+LJLIB_NOREG LJLIB_CF(table_clear)	LJLIB_REC(.)
+{
+  lj_tab_clear(lj_lib_checktab(L, 1));
+  return 0;
+}
+
+static int luaopen_table_new(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
+}
+
+static int luaopen_table_clear(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
+}
+
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
 
 #include "lj_libdef.h"
 #include "lj_libdef.h"
@@ -295,6 +300,8 @@ LUALIB_API int luaopen_table(lua_State *L)
   lua_getglobal(L, "unpack");
   lua_getglobal(L, "unpack");
   lua_setfield(L, -2, "unpack");
   lua_setfield(L, -2, "unpack");
 #endif
 #endif
+  lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
+  lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
   return 1;
   return 1;
 }
 }
 
 

+ 15 - 0
Source/ThirdParty/LuaJIT/src/lj.supp

@@ -24,3 +24,18 @@
    Memcheck:Cond
    Memcheck:Cond
    fun:lj_str_new
    fun:lj_str_new
 }
 }
+{
+   Optimized string compare
+   Memcheck:Addr4
+   fun:lj_str_fastcmp
+}
+{
+   Optimized string compare
+   Memcheck:Addr1
+   fun:lj_str_fastcmp
+}
+{
+   Optimized string compare
+   Memcheck:Cond
+   fun:lj_str_fastcmp
+}

+ 17 - 9
Source/ThirdParty/LuaJIT/src/lj_alloc.c

@@ -77,7 +77,7 @@
 #define WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <windows.h>
 
 
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 
 
 /* Undocumented, but hey, that's what we all love so much about Windows. */
 /* Undocumented, but hey, that's what we all love so much about Windows. */
 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
@@ -174,28 +174,36 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
 #endif
 #endif
 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
 
 
-#if LJ_64
-/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
+#if LJ_64 && !LJ_GC64
+/* 64 bit mode with 32 bit pointers needs special support for allocating
+** memory in the lower 2GB.
+*/
 
 
 #if defined(MAP_32BIT)
 #if defined(MAP_32BIT)
 
 
+#if defined(__sun__)
+#define MMAP_REGION_START	((uintptr_t)0x1000)
+#else
 /* Actually this only gives us max. 1GB in current Linux kernels. */
 /* Actually this only gives us max. 1GB in current Linux kernels. */
+#define MMAP_REGION_START	((uintptr_t)0)
+#endif
+
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 {
 {
   int olderr = errno;
   int olderr = errno;
-  void *ptr = mmap(NULL, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
+  void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
   errno = olderr;
   errno = olderr;
   return ptr;
   return ptr;
 }
 }
 
 
-#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__sun__)
+#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || defined(__CYGWIN__)
 
 
 /* OSX and FreeBSD mmap() use a naive first-fit linear search.
 /* OSX and FreeBSD mmap() use a naive first-fit linear search.
 ** That's perfect for us. Except that -pagezero_size must be set for OSX,
 ** That's perfect for us. Except that -pagezero_size must be set for OSX,
 ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
 ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
 ** to be reduced to 250MB on FreeBSD.
 ** to be reduced to 250MB on FreeBSD.
 */
 */
-#if LJ_TARGET_OSX
+#if LJ_TARGET_OSX || defined(__DragonFly__)
 #define MMAP_REGION_START	((uintptr_t)0x10000)
 #define MMAP_REGION_START	((uintptr_t)0x10000)
 #elif LJ_TARGET_PS4
 #elif LJ_TARGET_PS4
 #define MMAP_REGION_START	((uintptr_t)0x4000)
 #define MMAP_REGION_START	((uintptr_t)0x4000)
@@ -232,7 +240,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
       return p;
       return p;
     }
     }
     if (p != CMFAIL) munmap(p, size);
     if (p != CMFAIL) munmap(p, size);
-#ifdef __sun__
+#if defined(__sun__) || defined(__DragonFly__)
     alloc_hint += 0x1000000;  /* Need near-exhaustive linear scan. */
     alloc_hint += 0x1000000;  /* Need near-exhaustive linear scan. */
     if (alloc_hint + size < MMAP_REGION_END) continue;
     if (alloc_hint + size < MMAP_REGION_END) continue;
 #endif
 #endif
@@ -252,7 +260,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
 
 
 #else
 #else
 
 
-/* 32 bit mode is easy. */
+/* 32 bit mode and GC64 mode is easy. */
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 {
 {
   int olderr = errno;
   int olderr = errno;
@@ -288,7 +296,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_MAYMOVE	1
 #define CALL_MREMAP_MAYMOVE	1
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #else
 #else
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE

+ 75 - 62
Source/ThirdParty/LuaJIT/src/lj_api.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Public Lua/C API.
 ** Public Lua/C API.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 **
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -26,6 +26,7 @@
 #include "lj_trace.h"
 #include "lj_trace.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "lj_strscan.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 
 /* -- Common helper functions --------------------------------------------- */
 /* -- Common helper functions --------------------------------------------- */
 
 
@@ -190,7 +191,7 @@ LUA_API int lua_type(lua_State *L, int idx)
   cTValue *o = index2adr(L, idx);
   cTValue *o = index2adr(L, idx);
   if (tvisnumber(o)) {
   if (tvisnumber(o)) {
     return LUA_TNUMBER;
     return LUA_TNUMBER;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o)) {
   } else if (tvislightud(o)) {
     return LUA_TLIGHTUSERDATA;
     return LUA_TLIGHTUSERDATA;
 #endif
 #endif
@@ -270,7 +271,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     return 0;
     return 0;
   } else if (tvispri(o1)) {
   } else if (tvispri(o1)) {
     return o1 != niltv(L) && o2 != niltv(L);
     return o1 != niltv(L) && o2 != niltv(L);
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o1)) {
   } else if (tvislightud(o1)) {
     return o1->u64 == o2->u64;
     return o1->u64 == o2->u64;
 #endif
 #endif
@@ -285,8 +286,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     } else {
     } else {
       L->top = base+2;
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
     }
   }
   }
 }
 }
@@ -308,8 +309,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
     } else {
     } else {
       L->top = base+2;
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
     }
   }
   }
 }
 }
@@ -436,7 +437,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
   } else {
   } else {
     if (len != NULL) *len = 0;
     if (len != NULL) *len = 0;
@@ -455,7 +456,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
   } else {
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -477,7 +478,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
   } else {
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -509,7 +510,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
   } else if (tvisudata(o)) {
   } else if (tvisudata(o)) {
     return udataV(o)->len;
     return udataV(o)->len;
   } else if (tvisnumber(o)) {
   } else if (tvisnumber(o)) {
-    GCstr *s = lj_str_fromnumber(L, o);
+    GCstr *s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     setstrV(L, o, s);
     return s->len;
     return s->len;
   } else {
   } else {
@@ -547,17 +548,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
 
 
 LUA_API const void *lua_topointer(lua_State *L, int idx)
 LUA_API const void *lua_topointer(lua_State *L, int idx)
 {
 {
-  cTValue *o = index2adr(L, idx);
-  if (tvisudata(o))
-    return uddata(udataV(o));
-  else if (tvislightud(o))
-    return lightudV(o);
-  else if (tviscdata(o))
-    return cdataptr(cdataV(o));
-  else if (tvisgcv(o))
-    return gcV(o);
-  else
-    return NULL;
+  return lj_obj_ptr(index2adr(L, idx));
 }
 }
 
 
 /* -- Stack setters (object creation) ------------------------------------- */
 /* -- Stack setters (object creation) ------------------------------------- */
@@ -608,7 +599,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
 				     va_list argp)
 				     va_list argp)
 {
 {
   lj_gc_check(L);
   lj_gc_check(L);
-  return lj_str_pushvf(L, fmt, argp);
+  return lj_strfmt_pushvf(L, fmt, argp);
 }
 }
 
 
 LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
 LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -617,7 +608,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
   va_list argp;
   va_list argp;
   lj_gc_check(L);
   lj_gc_check(L);
   va_start(argp, fmt);
   va_start(argp, fmt);
-  ret = lj_str_pushvf(L, fmt, argp);
+  ret = lj_strfmt_pushvf(L, fmt, argp);
   va_end(argp);
   va_end(argp);
   return ret;
   return ret;
 }
 }
@@ -651,10 +642,8 @@ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
 
 
 LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
 LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
 {
 {
-  GCtab *t;
   lj_gc_check(L);
   lj_gc_check(L);
-  t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec));
-  settabV(L, L->top, t);
+  settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
   incr_top(L);
   incr_top(L);
 }
 }
 
 
@@ -725,8 +714,8 @@ LUA_API void lua_concat(lua_State *L, int n)
       n -= (int)(L->top - top);
       n -= (int)(L->top - top);
       L->top = top+2;
       L->top = top+2;
       lj_vm_call(L, top, 1+1);
       lj_vm_call(L, top, 1+1);
-      L->top--;
-      copyTV(L, L->top-1, L->top);
+      L->top -= 1+LJ_FR2;
+      copyTV(L, L->top-1, L->top+LJ_FR2);
     } while (--n > 0);
     } while (--n > 0);
   } else if (n == 0) {  /* Push empty string. */
   } else if (n == 0) {  /* Push empty string. */
     setstrV(L, L->top, &G(L)->strempty);
     setstrV(L, L->top, &G(L)->strempty);
@@ -745,8 +734,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
   if (v == NULL) {
   if (v == NULL) {
     L->top += 2;
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   }
   copyTV(L, L->top-1, v);
   copyTV(L, L->top-1, v);
 }
 }
@@ -761,8 +750,8 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
   if (v == NULL) {
   if (v == NULL) {
     L->top += 2;
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   }
   copyTV(L, L->top, v);
   copyTV(L, L->top, v);
   incr_top(L);
   incr_top(L);
@@ -903,13 +892,14 @@ LUA_API void lua_settable(lua_State *L, int idx)
   o = lj_meta_tset(L, t, L->top-2);
   o = lj_meta_tset(L, t, L->top-2);
   if (o) {
   if (o) {
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top-1);
     L->top -= 2;
     L->top -= 2;
+    copyTV(L, o, L->top+1);
   } else {
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 3;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 3+LJ_FR2;
   }
   }
 }
 }
 
 
@@ -923,14 +913,14 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
   setstrV(L, &key, lj_str_newz(L, k));
   setstrV(L, &key, lj_str_newz(L, k));
   o = lj_meta_tset(L, t, &key);
   o = lj_meta_tset(L, t, &key);
   if (o) {
   if (o) {
-    L->top--;
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top);
+    copyTV(L, o, --L->top);
   } else {
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 2;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 2+LJ_FR2;
   }
   }
 }
 }
 
 
@@ -1037,11 +1027,24 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
 
 
 /* -- Calls --------------------------------------------------------------- */
 /* -- Calls --------------------------------------------------------------- */
 
 
+#if LJ_FR2
+static TValue *api_call_base(lua_State *L, int nargs)
+{
+  TValue *o = L->top, *base = o - nargs;
+  L->top = o+1;
+  for (; o > base; o--) copyTV(L, o, o-1);
+  setnilV(o);
+  return o+1;
+}
+#else
+#define api_call_base(L, nargs)	(L->top - (nargs))
+#endif
+
 LUA_API void lua_call(lua_State *L, int nargs, int nresults)
 LUA_API void lua_call(lua_State *L, int nargs, int nresults)
 {
 {
   api_check(L, L->status == 0 || L->status == LUA_ERRERR);
   api_check(L, L->status == 0 || L->status == LUA_ERRERR);
   api_checknelems(L, nargs+1);
   api_checknelems(L, nargs+1);
-  lj_vm_call(L, L->top - nargs, nresults+1);
+  lj_vm_call(L, api_call_base(L, nargs), nresults+1);
 }
 }
 
 
 LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
 LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1059,7 +1062,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
     api_checkvalidindex(L, o);
     api_checkvalidindex(L, o);
     ef = savestack(L, o);
     ef = savestack(L, o);
   }
   }
-  status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
+  status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
   if (status) hook_restore(g, oldh);
   if (status) hook_restore(g, oldh);
   return status;
   return status;
 }
 }
@@ -1067,12 +1070,14 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
 static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
 static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
 {
 {
   GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
   GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
+  TValue *top = L->top;
   fn->c.f = func;
   fn->c.f = func;
-  setfuncV(L, L->top, fn);
-  setlightudV(L->top+1, checklightudptr(L, ud));
+  setfuncV(L, top++, fn);
+  if (LJ_FR2) setnilV(top++);
+  setlightudV(top++, checklightudptr(L, ud));
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */
-  L->top += 2;
-  return L->top-1;  /* Now call the newly allocated C function. */
+  L->top = top;
+  return top-1;  /* Now call the newly allocated C function. */
 }
 }
 
 
 LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
 LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1089,10 +1094,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
 LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 {
 {
   if (luaL_getmetafield(L, idx, field)) {
   if (luaL_getmetafield(L, idx, field)) {
-    TValue *base = L->top--;
-    copyTV(L, base, index2adr(L, idx));
-    L->top = base+1;
-    lj_vm_call(L, base, 1+1);
+    TValue *top = L->top--;
+    if (LJ_FR2) setnilV(top++);
+    copyTV(L, top++, index2adr(L, idx));
+    L->top = top;
+    lj_vm_call(L, top-1, 1+1);
     return 1;
     return 1;
   }
   }
   return 0;
   return 0;
@@ -1119,12 +1125,14 @@ LUA_API int lua_yield(lua_State *L, int nresults)
     } else {  /* Yield from hook: add a pseudo-frame. */
     } else {  /* Yield from hook: add a pseudo-frame. */
       TValue *top = L->top;
       TValue *top = L->top;
       hook_leave(g);
       hook_leave(g);
-      top->u64 = cframe_multres(cf);
-      setcont(top+1, lj_cont_hook);
-      setframe_pc(top+1, cframe_pc(cf)-1);
-      setframe_gc(top+2, obj2gco(L));
-      setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT);
-      L->top = L->base = top+3;
+      (top++)->u64 = cframe_multres(cf);
+      setcont(top, lj_cont_hook);
+      if (LJ_FR2) top++;
+      setframe_pc(top, cframe_pc(cf)-1);
+      if (LJ_FR2) top++;
+      setframe_gc(top, obj2gco(L), LJ_TTHREAD);
+      setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
+      L->top = L->base = top+1;
 #if LJ_TARGET_X64
 #if LJ_TARGET_X64
       lj_err_throw(L, LUA_YIELD);
       lj_err_throw(L, LUA_YIELD);
 #else
 #else
@@ -1141,7 +1149,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
 LUA_API int lua_resume(lua_State *L, int nargs)
 LUA_API int lua_resume(lua_State *L, int nargs)
 {
 {
   if (L->cframe == NULL && L->status <= LUA_YIELD)
   if (L->cframe == NULL && L->status <= LUA_YIELD)
-    return lj_vm_resume(L, L->top - nargs, 0, 0);
+    return lj_vm_resume(L,
+      L->status == 0 ? api_call_base(L, nargs) : L->top - nargs,
+      0, 0);
   L->top = L->base;
   L->top = L->base;
   setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
   setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
   incr_top(L);
   incr_top(L);
@@ -1171,7 +1181,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
     res = (int)(g->gc.total & 0x3ff);
     res = (int)(g->gc.total & 0x3ff);
     break;
     break;
   case LUA_GCSTEP: {
   case LUA_GCSTEP: {
-    MSize a = (MSize)data << 10;
+    GCSize a = (GCSize)data << 10;
     g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
     g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
     while (g->gc.total >= g->gc.threshold)
     while (g->gc.total >= g->gc.threshold)
       if (lj_gc_step(L) > 0) {
       if (lj_gc_step(L) > 0) {
@@ -1188,6 +1198,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
     res = (int)(g->gc.stepmul);
     res = (int)(g->gc.stepmul);
     g->gc.stepmul = (MSize)data;
     g->gc.stepmul = (MSize)data;
     break;
     break;
+  case LUA_GCISRUNNING:
+    res = (g->gc.threshold != LJ_MAX_MEM);
+    break;
   default:
   default:
     res = -1;  /* Invalid option. */
     res = -1;  /* Invalid option. */
   }
   }

+ 144 - 50
Source/ThirdParty/LuaJIT/src/lj_arch.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Target architecture selection.
 ** Target architecture selection.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_ARCH_H
 #ifndef _LJ_ARCH_H
@@ -19,10 +19,10 @@
 #define LUAJIT_ARCH_x64		2
 #define LUAJIT_ARCH_x64		2
 #define LUAJIT_ARCH_ARM		3
 #define LUAJIT_ARCH_ARM		3
 #define LUAJIT_ARCH_arm		3
 #define LUAJIT_ARCH_arm		3
-#define LUAJIT_ARCH_PPC		4
-#define LUAJIT_ARCH_ppc		4
-#define LUAJIT_ARCH_PPCSPE	5
-#define LUAJIT_ARCH_ppcspe	5
+#define LUAJIT_ARCH_ARM64	4
+#define LUAJIT_ARCH_arm64	4
+#define LUAJIT_ARCH_PPC		5
+#define LUAJIT_ARCH_ppc		5
 #define LUAJIT_ARCH_MIPS	6
 #define LUAJIT_ARCH_MIPS	6
 #define LUAJIT_ARCH_mips	6
 #define LUAJIT_ARCH_mips	6
 
 
@@ -43,12 +43,10 @@
 #define LUAJIT_TARGET	LUAJIT_ARCH_X64
 #define LUAJIT_TARGET	LUAJIT_ARCH_X64
 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM
+#elif defined(__aarch64__)
+#define LUAJIT_TARGET	LUAJIT_ARCH_ARM64
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
-#ifdef __NO_FPRS__
-#define LUAJIT_TARGET	LUAJIT_ARCH_PPCSPE
-#else
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
-#endif
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS
 #else
 #else
@@ -67,7 +65,8 @@
 #elif defined(__MACH__) && defined(__APPLE__)
 #elif defined(__MACH__) && defined(__APPLE__)
 #define LUAJIT_OS	LUAJIT_OS_OSX
 #define LUAJIT_OS	LUAJIT_OS_OSX
 #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
 #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
-       defined(__NetBSD__) || defined(__OpenBSD__)) && !defined(__ORBIS__)
+       defined(__NetBSD__) || defined(__OpenBSD__) || \
+       defined(__DragonFly__)) && !defined(__ORBIS__)
 #define LUAJIT_OS	LUAJIT_OS_BSD
 #define LUAJIT_OS	LUAJIT_OS_BSD
 #elif (defined(__sun__) && defined(__svr4__)) || defined(__CYGWIN__)
 #elif (defined(__sun__) && defined(__svr4__)) || defined(__CYGWIN__)
 #define LUAJIT_OS	LUAJIT_OS_POSIX
 #define LUAJIT_OS	LUAJIT_OS_POSIX
@@ -95,7 +94,7 @@
 #define LJ_TARGET_WINDOWS	(LUAJIT_OS == LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_WINDOWS	(LUAJIT_OS == LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_LINUX		(LUAJIT_OS == LUAJIT_OS_LINUX)
 #define LJ_TARGET_LINUX		(LUAJIT_OS == LUAJIT_OS_LINUX)
 #define LJ_TARGET_OSX		(LUAJIT_OS == LUAJIT_OS_OSX)
 #define LJ_TARGET_OSX		(LUAJIT_OS == LUAJIT_OS_OSX)
-#define LJ_TARGET_IOS		(LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM)
+#define LJ_TARGET_IOS		(LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
 #define LJ_TARGET_POSIX		(LUAJIT_OS > LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_POSIX		(LUAJIT_OS > LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_DLOPEN	LJ_TARGET_POSIX
 #define LJ_TARGET_DLOPEN	LJ_TARGET_POSIX
 
 
@@ -111,11 +110,22 @@
 #define NULL ((void*)0)
 #define NULL ((void*)0)
 #endif
 #endif
 
 
+#ifdef __psp2__
+#define LJ_TARGET_PSVITA	1
+#define LJ_TARGET_CONSOLE	1
+#endif
+
 #if _XBOX_VER >= 200
 #if _XBOX_VER >= 200
 #define LJ_TARGET_XBOX360	1
 #define LJ_TARGET_XBOX360	1
 #define LJ_TARGET_CONSOLE	1
 #define LJ_TARGET_CONSOLE	1
 #endif
 #endif
 
 
+#ifdef _DURANGO
+#define LJ_TARGET_XBOXONE	1
+#define LJ_TARGET_CONSOLE	1
+#define LJ_TARGET_GC64		1
+#endif
+
 #define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
 #define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
 #define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
 #define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
 #define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
 #define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
@@ -145,7 +155,11 @@
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#define LJ_ABI_WIN		LJ_TARGET_WINDOWS
+#if LJ_TARGET_WINDOWS || __CYGWIN__
+#define LJ_ABI_WIN		1
+#else
+#define LJ_ABI_WIN		0
+#endif
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
 #define LJ_TARGET_EHRETREG	0
@@ -154,6 +168,9 @@
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNALIGNED	1
 #define LJ_TARGET_UNALIGNED	1
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
+#ifdef LUAJIT_ENABLE_GC64
+#define LJ_TARGET_GC64		1
+#endif
 
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
 
 
@@ -175,7 +192,9 @@
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 
 
-#if __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__
+#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
+#define LJ_ARCH_VERSION		80
+#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
 #define LJ_ARCH_VERSION		70
 #define LJ_ARCH_VERSION		70
 #elif __ARM_ARCH_6T2__
 #elif __ARM_ARCH_6T2__
 #define LJ_ARCH_VERSION		61
 #define LJ_ARCH_VERSION		61
@@ -185,15 +204,45 @@
 #define LJ_ARCH_VERSION		50
 #define LJ_ARCH_VERSION		50
 #endif
 #endif
 
 
+#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
+
+#define LJ_ARCH_NAME		"arm64"
+#define LJ_ARCH_BITS		64
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#define LJ_TARGET_ARM64		1
+#define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_JUMPRANGE	27	/* +-2^27 = +-128MB */
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+#define LJ_ARCH_NOJIT		1	/* NYI */
+
+#define LJ_ARCH_VERSION		80
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 
 
-#define LJ_ARCH_NAME		"ppc"
+#ifndef LJ_ARCH_ENDIAN
+#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#else
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#endif
+#endif
+
 #if _LP64
 #if _LP64
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_BITS		64
+#if LJ_ARCH_ENDIAN == LUAJIT_LE
+#define LJ_ARCH_NAME		"ppc64le"
+#else
+#define LJ_ARCH_NAME		"ppc64"
+#endif
 #else
 #else
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_BITS		32
+#define LJ_ARCH_NAME		"ppc"
 #endif
 #endif
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
+
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_EHRETREG	3
 #define LJ_TARGET_EHRETREG	3
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
@@ -202,6 +251,15 @@
 #define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
 #define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
 
 
+#if LJ_TARGET_CONSOLE
+#define LJ_ARCH_PPC32ON64	1
+#define LJ_ARCH_NOFFI		1
+#elif LJ_ARCH_BITS == 64
+#define LJ_ARCH_PPC64		1
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NOJIT		1	/* NYI */
+#endif
+
 #if _ARCH_PWR7
 #if _ARCH_PWR7
 #define LJ_ARCH_VERSION		70
 #define LJ_ARCH_VERSION		70
 #elif _ARCH_PWR6
 #elif _ARCH_PWR6
@@ -215,10 +273,6 @@
 #else
 #else
 #define LJ_ARCH_VERSION		0
 #define LJ_ARCH_VERSION		0
 #endif
 #endif
-#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
-#define LJ_ARCH_PPC64		1
-#define LJ_ARCH_NOFFI		1
-#endif
 #if _ARCH_PPCSQ
 #if _ARCH_PPCSQ
 #define LJ_ARCH_SQRT		1
 #define LJ_ARCH_SQRT		1
 #endif
 #endif
@@ -232,25 +286,6 @@
 #define LJ_ARCH_XENON		1
 #define LJ_ARCH_XENON		1
 #endif
 #endif
 
 
-#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
-
-#define LJ_ARCH_NAME		"ppcspe"
-#define LJ_ARCH_BITS		32
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
-#ifndef LJ_ABI_SOFTFP
-#define LJ_ABI_SOFTFP		1
-#endif
-#define LJ_ABI_EABI		1
-#define LJ_TARGET_PPCSPE	1
-#define LJ_TARGET_EHRETREG	3
-#define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
-#define LJ_TARGET_MASKSHIFT	0
-#define LJ_TARGET_MASKROT	1
-#define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
-#define LJ_ARCH_NOFFI		1	/* NYI: comparisons, calls. */
-#define LJ_ARCH_NOJIT		1
-
 #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
 #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
 
 
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
@@ -269,6 +304,13 @@
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
 
 
+#if !defined(LJ_ARCH_HASFPU) && defined(__mips_soft_float)
+#define LJ_ARCH_HASFPU		0
+#endif
+#if !defined(LJ_ABI_SOFTFP) && defined(__mips_soft_float)
+#define LJ_ABI_SOFTFP		1
+#endif
+
 #if _MIPS_ARCH_MIPS32R2
 #if _MIPS_ARCH_MIPS32R2
 #define LJ_ARCH_VERSION		20
 #define LJ_ARCH_VERSION		20
 #else
 #else
@@ -297,6 +339,16 @@
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
 #error "Need at least GCC 4.2 or newer"
 #error "Need at least GCC 4.2 or newer"
 #endif
 #endif
+#elif LJ_TARGET_ARM64
+#if __clang__
+#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+#error "Need at least Clang 3.5 or newer"
+#endif
+#else
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
+#error "Need at least GCC 4.8 or newer"
+#endif
+#endif
 #elif !LJ_TARGET_PS3
 #elif !LJ_TARGET_PS3
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #error "Need at least GCC 4.3 or newer"
 #error "Need at least GCC 4.3 or newer"
@@ -320,19 +372,29 @@
 #if !(__ARM_EABI__ || LJ_TARGET_IOS)
 #if !(__ARM_EABI__ || LJ_TARGET_IOS)
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
 #endif
 #endif
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+#if defined(__AARCH64EB__)
+#error "No support for big-endian ARM64"
+#endif
+#if defined(_ILP32)
+#error "No support for ILP32 model on ARM64"
+#endif
+#elif LJ_TARGET_PPC
 #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
 #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
 #error "No support for PowerPC CPUs without double-precision FPU"
 #error "No support for PowerPC CPUs without double-precision FPU"
 #endif
 #endif
-#if defined(_LITTLE_ENDIAN)
-#error "No support for little-endian PowerPC"
+#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
+#error "No support for little-endian PPC32"
 #endif
 #endif
-#if defined(_LP64)
-#error "No support for PowerPC 64 bit mode"
+#if LJ_ARCH_PPC64
+#error "No support for PowerPC 64 bit mode (yet)"
+#endif
+#ifdef __NO_FPRS__
+#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
 #endif
 #endif
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
-#if defined(__mips_soft_float)
-#error "No support for MIPS CPUs without FPU"
+#if defined(_LP64)
+#error "No support for MIPS64"
 #endif
 #endif
 #endif
 #endif
 #endif
 #endif
@@ -358,8 +420,22 @@
 #endif
 #endif
 #endif
 #endif
 
 
+/* 64 bit GC references. */
+#if LJ_TARGET_GC64
+#define LJ_GC64			1
+#else
+#define LJ_GC64			0
+#endif
+
+/* 2-slot frame info. */
+#if LJ_GC64
+#define LJ_FR2			1
+#else
+#define LJ_FR2			0
+#endif
+
 /* Disable or enable the JIT compiler. */
 /* Disable or enable the JIT compiler. */
-#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
+#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
 #define LJ_HASJIT		0
 #define LJ_HASJIT		0
 #else
 #else
 #define LJ_HASJIT		1
 #define LJ_HASJIT		1
@@ -372,6 +448,21 @@
 #define LJ_HASFFI		1
 #define LJ_HASFFI		1
 #endif
 #endif
 
 
+#if defined(LUAJIT_DISABLE_PROFILE)
+#define LJ_HASPROFILE		0
+#elif LJ_TARGET_POSIX
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_SIGPROF	1
+#elif LJ_TARGET_PS3
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_PTHREAD	1
+#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_WTHREAD	1
+#else
+#define LJ_HASPROFILE		0
+#endif
+
 #ifndef LJ_ARCH_HASFPU
 #ifndef LJ_ARCH_HASFPU
 #define LJ_ARCH_HASFPU		1
 #define LJ_ARCH_HASFPU		1
 #endif
 #endif
@@ -404,15 +495,18 @@
 #define LJ_TARGET_UNALIGNED	0
 #define LJ_TARGET_UNALIGNED	0
 #endif
 #endif
 
 
-/* Various workarounds for embedded operating systems. */
-#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
+/* Various workarounds for embedded operating systems or weak C runtimes. */
+#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_LOG2
 #define LUAJIT_NO_LOG2
 #endif
 #endif
-#if defined(__symbian__)
+#if defined(__symbian__) || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_EXP2
 #define LUAJIT_NO_EXP2
 #endif
 #endif
+#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
+#define LJ_NO_SYSTEM		1
+#endif
 
 
-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3
+#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
 #define LJ_NO_UNWIND		1
 #define LJ_NO_UNWIND		1
 #endif
 #endif
 
 

+ 432 - 68
Source/ThirdParty/LuaJIT/src/lj_asm.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** IR assembler (SSA IR -> machine code).
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_asm_c
 #define lj_asm_c
@@ -179,6 +179,12 @@ IRFLDEF(FLOFS)
 #error "Missing instruction emitter for target CPU"
 #error "Missing instruction emitter for target CPU"
 #endif
 #endif
 
 
+/* Generic load/store of register from/to stack slot. */
+#define emit_spload(as, ir, r, ofs) \
+  emit_loadofs(as, ir, (r), RID_SP, (ofs))
+#define emit_spstore(as, ir, r, ofs) \
+  emit_storeofs(as, ir, (r), RID_SP, (ofs))
+
 /* -- Register allocator debugging ---------------------------------------- */
 /* -- Register allocator debugging ---------------------------------------- */
 
 
 /* #define LUAJIT_DEBUG_RA */
 /* #define LUAJIT_DEBUG_RA */
@@ -336,7 +342,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
     emit_getgl(as, r, jit_base);
     emit_getgl(as, r, jit_base);
   } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
   } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
     lua_assert(irt_isnil(ir->t));  /* REF_NIL stores ASMREF_L register. */
     lua_assert(irt_isnil(ir->t));  /* REF_NIL stores ASMREF_L register. */
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
 #if LJ_64
 #if LJ_64
   } else if (ir->o == IR_KINT64) {
   } else if (ir->o == IR_KINT64) {
     emit_loadu64(as, r, ir_kint64(ir)->u64);
     emit_loadu64(as, r, ir_kint64(ir)->u64);
@@ -353,6 +359,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
 static int32_t ra_spill(ASMState *as, IRIns *ir)
 static int32_t ra_spill(ASMState *as, IRIns *ir)
 {
 {
   int32_t slot = ir->s;
   int32_t slot = ir->s;
+  lua_assert(ir >= as->ir + REF_TRUE);
   if (!ra_hasspill(slot)) {
   if (!ra_hasspill(slot)) {
     if (irt_is64(ir->t)) {
     if (irt_is64(ir->t)) {
       slot = as->evenspill;
       slot = as->evenspill;
@@ -693,7 +700,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
 	emit_loadu64(as, dest, ir_kint64(ir)->u64);
 	emit_loadu64(as, dest, ir_kint64(ir)->u64);
 	return;
 	return;
 #endif
 #endif
-      } else {
+      } else if (ir->o != IR_KPRI) {
 	lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
 	lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
 		   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
 		   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
 	emit_loadi(as, dest, ir->i);
 	emit_loadi(as, dest, ir->i);
@@ -943,44 +950,6 @@ static void asm_snap_prep(ASMState *as)
 
 
 /* -- Miscellaneous helpers ----------------------------------------------- */
 /* -- Miscellaneous helpers ----------------------------------------------- */
 
 
-/* Collect arguments from CALL* and CARG instructions. */
-static void asm_collectargs(ASMState *as, IRIns *ir,
-			    const CCallInfo *ci, IRRef *args)
-{
-  uint32_t n = CCI_NARGS(ci);
-  lua_assert(n <= CCI_NARGS_MAX*2);  /* Account for split args. */
-  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
-  while (n-- > 1) {
-    ir = IR(ir->op1);
-    lua_assert(ir->o == IR_CARG);
-    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
-  }
-  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
-  lua_assert(IR(ir->op1)->o != IR_CARG);
-}
-
-/* Reconstruct CCallInfo flags for CALLX*. */
-static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
-{
-  uint32_t nargs = 0;
-  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
-    IRIns *ira = IR(ir->op1);
-    nargs++;
-    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
-  }
-#if LJ_HASFFI
-  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
-    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
-    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
-    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
-#if LJ_TARGET_X86
-    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
-#endif
-  }
-#endif
-  return (nargs | (ir->t.irt << CCI_OTSHIFT));
-}
-
 /* Calculate stack adjustment. */
 /* Calculate stack adjustment. */
 static int32_t asm_stack_adjust(ASMState *as)
 static int32_t asm_stack_adjust(ASMState *as)
 {
 {
@@ -1065,6 +1034,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
   as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
   as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
 }
 }
 
 
+/* -- Buffer operations --------------------------------------------------- */
+
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
+
+static void asm_bufhdr(ASMState *as, IRIns *ir)
+{
+  Reg sb = ra_dest(as, ir, RSET_GPR);
+  if ((ir->op2 & IRBUFHDR_APPEND)) {
+    /* Rematerialize const buffer pointer instead of likely spill. */
+    IRIns *irp = IR(ir->op1);
+    if (!(ra_hasreg(irp->r) || irp == ir-1 ||
+	  (irp == ir-2 && !ra_used(ir-1)))) {
+      while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
+	irp = IR(irp->op1);
+      if (irref_isk(irp->op1)) {
+	ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
+	ir = irp;
+      }
+    }
+  } else {
+    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+    /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
+    emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
+    emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
+  }
+#if LJ_TARGET_X86ORX64
+  ra_left(as, sb, ir->op1);
+#else
+  ra_leftov(as, sb, ir->op1);
+#endif
+}
+
+static void asm_bufput(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
+  IRRef args[3];
+  IRIns *irs;
+  int kchar = -1;
+  args[0] = ir->op1;  /* SBuf * */
+  args[1] = ir->op2;  /* GCstr * */
+  irs = IR(ir->op2);
+  lua_assert(irt_isstr(irs->t));
+  if (irs->o == IR_KGC) {
+    GCstr *s = ir_kstr(irs);
+    if (s->len == 1) {  /* Optimize put of single-char string constant. */
+      kchar = strdata(s)[0];
+      args[1] = ASMREF_TMP1;  /* int, truncated to char */
+      ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+    }
+  } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
+    if (irs->o == IR_TOSTR) {  /* Fuse number to string conversions. */
+      if (irs->op2 == IRTOSTR_NUM) {
+	args[1] = ASMREF_TMP1;  /* TValue * */
+	ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
+      } else {
+	lua_assert(irt_isinteger(IR(irs->op1)->t));
+	args[1] = irs->op1;  /* int */
+	if (irs->op2 == IRTOSTR_INT)
+	  ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
+	else
+	  ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+      }
+    } else if (irs->o == IR_SNEW) {  /* Fuse string allocation. */
+      args[1] = irs->op1;  /* const void * */
+      args[2] = irs->op2;  /* MSize */
+      ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
+    }
+  }
+  asm_setupresult(as, ir, ci);  /* SBuf * */
+  asm_gencall(as, ci, args);
+  if (args[1] == ASMREF_TMP1) {
+    Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
+    if (kchar == -1)
+      asm_tvptr(as, tmp, irs->op1);
+    else
+      ra_allockreg(as, kchar, tmp);
+  }
+}
+
+static void asm_bufstr(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
+  IRRef args[1];
+  args[0] = ir->op1;  /* SBuf *sb */
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCstr * */
+  asm_gencall(as, ci, args);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci;
+  IRRef args[2];
+  args[0] = ASMREF_L;
+  as->gcsteps++;
+  if (ir->op2 == IRTOSTR_NUM) {
+    args[1] = ASMREF_TMP1;  /* cTValue * */
+    ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
+  } else {
+    args[1] = ir->op1;  /* int32_t k */
+    if (ir->op2 == IRTOSTR_INT)
+      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
+    else
+      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
+  }
+  asm_setupresult(as, ir, ci);  /* GCstr * */
+  asm_gencall(as, ci, args);
+  if (ir->op2 == IRTOSTR_NUM)
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+}
+
+#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+  IRCallID id;
+  IRRef args[2];
+  lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
+  args[LJ_BE] = (ir-1)->op1;
+  args[LJ_LE] = ir->op1;
+  if (st == IRT_NUM || st == IRT_FLOAT) {
+    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+    ir--;
+  } else {
+    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
+  }
+  {
+#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
+    CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
+    cim.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
+#else
+    const CCallInfo *ci = &lj_ir_callinfo[id];
+#endif
+    asm_setupresult(as, ir, ci);
+    asm_gencall(as, ci, args);
+  }
+}
+#endif
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+  IRRef args[3];
+  if (ir->r == RID_SINK)
+    return;
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ir->op1;      /* GCtab *t     */
+  args[2] = ASMREF_TMP1;  /* cTValue *key */
+  asm_setupresult(as, ir, ci);  /* TValue * */
+  asm_gencall(as, ci, args);
+  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+}
+
+static void asm_lref(ASMState *as, IRIns *ir)
+{
+  Reg r = ra_dest(as, ir, RSET_GPR);
+#if LJ_TARGET_X86ORX64
+  ra_left(as, r, ASMREF_L);
+#else
+  ra_leftov(as, r, ASMREF_L);
+#endif
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Collect arguments from CALL* and CARG instructions. */
+static void asm_collectargs(ASMState *as, IRIns *ir,
+			    const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n = CCI_XNARGS(ci);
+  lua_assert(n <= CCI_NARGS_MAX*2);  /* Account for split args. */
+  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+  while (n-- > 1) {
+    ir = IR(ir->op1);
+    lua_assert(ir->o == IR_CARG);
+    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
+  }
+  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
+  lua_assert(IR(ir->op1)->o != IR_CARG);
+}
+
+/* Reconstruct CCallInfo flags for CALLX*. */
+static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
+{
+  uint32_t nargs = 0;
+  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
+    IRIns *ira = IR(ir->op1);
+    nargs++;
+    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
+  }
+#if LJ_HASFFI
+  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
+    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
+    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
+    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
+#if LJ_TARGET_X86
+    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
+#endif
+  }
+#endif
+  return (nargs | (ir->t.irt << CCI_OTSHIFT));
+}
+
+static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[id];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX];
+  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+  asm_collectargs(as, ir, ci, args);
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+#if !LJ_SOFTFP
+static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
+  IRRef args[2];
+  args[0] = lref;
+  args[1] = rref;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
+{
+  IRIns *irp = IR(ir->op1);
+  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+    IRIns *irpp = IR(irp->op1);
+    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+      asm_fppow(as, ir, irpp->op1, irp->op2);
+      return 1;
+    }
+  }
+  return 0;
+}
+#endif
+
 /* -- PHI and loop handling ----------------------------------------------- */
 /* -- PHI and loop handling ----------------------------------------------- */
 
 
 /* Break a PHI cycle by renaming to a free register (evict if needed). */
 /* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1338,6 +1560,129 @@ static void asm_loop(ASMState *as)
 #error "Missing assembler for target CPU"
 #error "Missing assembler for target CPU"
 #endif
 #endif
 
 
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+  switch ((IROp)ir->o) {
+  /* Miscellaneous ops. */
+  case IR_LOOP: asm_loop(as); break;
+  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
+  case IR_USE:
+    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+  case IR_PHI: asm_phi(as, ir); break;
+  case IR_HIOP: asm_hiop(as, ir); break;
+  case IR_GCSTEP: asm_gcstep(as, ir); break;
+  case IR_PROF: asm_prof(as, ir); break;
+
+  /* Guarded assertions. */
+  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
+  case IR_ABC:
+    asm_comp(as, ir);
+    break;
+  case IR_EQ: case IR_NE:
+    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+      as->curins--;
+      asm_href(as, ir-1, (IROp)ir->o);
+    } else {
+      asm_equal(as, ir);
+    }
+    break;
+
+  case IR_RETF: asm_retf(as, ir); break;
+
+  /* Bit ops. */
+  case IR_BNOT: asm_bnot(as, ir); break;
+  case IR_BSWAP: asm_bswap(as, ir); break;
+  case IR_BAND: asm_band(as, ir); break;
+  case IR_BOR: asm_bor(as, ir); break;
+  case IR_BXOR: asm_bxor(as, ir); break;
+  case IR_BSHL: asm_bshl(as, ir); break;
+  case IR_BSHR: asm_bshr(as, ir); break;
+  case IR_BSAR: asm_bsar(as, ir); break;
+  case IR_BROL: asm_brol(as, ir); break;
+  case IR_BROR: asm_bror(as, ir); break;
+
+  /* Arithmetic ops. */
+  case IR_ADD: asm_add(as, ir); break;
+  case IR_SUB: asm_sub(as, ir); break;
+  case IR_MUL: asm_mul(as, ir); break;
+  case IR_DIV: asm_div(as, ir); break;
+  case IR_MOD: asm_mod(as, ir); break;
+  case IR_POW: asm_pow(as, ir); break;
+  case IR_NEG: asm_neg(as, ir); break;
+  case IR_ABS: asm_abs(as, ir); break;
+  case IR_ATAN2: asm_atan2(as, ir); break;
+  case IR_LDEXP: asm_ldexp(as, ir); break;
+  case IR_MIN: asm_min(as, ir); break;
+  case IR_MAX: asm_max(as, ir); break;
+  case IR_FPMATH: asm_fpmath(as, ir); break;
+
+  /* Overflow-checking arithmetic ops. */
+  case IR_ADDOV: asm_addov(as, ir); break;
+  case IR_SUBOV: asm_subov(as, ir); break;
+  case IR_MULOV: asm_mulov(as, ir); break;
+
+  /* Memory references. */
+  case IR_AREF: asm_aref(as, ir); break;
+  case IR_HREF: asm_href(as, ir, 0); break;
+  case IR_HREFK: asm_hrefk(as, ir); break;
+  case IR_NEWREF: asm_newref(as, ir); break;
+  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+  case IR_FREF: asm_fref(as, ir); break;
+  case IR_STRREF: asm_strref(as, ir); break;
+  case IR_LREF: asm_lref(as, ir); break;
+
+  /* Loads and stores. */
+  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+    asm_ahuvload(as, ir);
+    break;
+  case IR_FLOAD: asm_fload(as, ir); break;
+  case IR_XLOAD: asm_xload(as, ir); break;
+  case IR_SLOAD: asm_sload(as, ir); break;
+
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+  case IR_FSTORE: asm_fstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir); break;
+
+  /* Allocations. */
+  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+  case IR_TNEW: asm_tnew(as, ir); break;
+  case IR_TDUP: asm_tdup(as, ir); break;
+  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
+
+  /* Buffer operations. */
+  case IR_BUFHDR: asm_bufhdr(as, ir); break;
+  case IR_BUFPUT: asm_bufput(as, ir); break;
+  case IR_BUFSTR: asm_bufstr(as, ir); break;
+
+  /* Write barriers. */
+  case IR_TBAR: asm_tbar(as, ir); break;
+  case IR_OBAR: asm_obar(as, ir); break;
+
+  /* Type conversions. */
+  case IR_TOBIT: asm_tobit(as, ir); break;
+  case IR_CONV: asm_conv(as, ir); break;
+  case IR_TOSTR: asm_tostr(as, ir); break;
+  case IR_STRTO: asm_strto(as, ir); break;
+
+  /* Calls. */
+  case IR_CALLA:
+    as->gcsteps++;
+    /* fallthrough */
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+  case IR_CALLXS: asm_callx(as, ir); break;
+  case IR_CARG: break;
+
+  default:
+    setintV(&as->J->errinfo, ir->o);
+    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+    break;
+  }
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 /* -- Head of trace ------------------------------------------------------- */
 
 
 /* Head of a root trace. */
 /* Head of a root trace. */
@@ -1372,6 +1717,11 @@ static void asm_head_side(ASMState *as)
   int pass3 = 0;
   int pass3 = 0;
   IRRef i;
   IRRef i;
 
 
+  if (as->snapno && as->topslot > as->parent->topslot) {
+    /* Force snap #0 alloc to prevent register overwrite in stack check. */
+    as->snapno = 0;
+    asm_snap_alloc(as);
+  }
   allow = asm_head_side_base(as, irp, allow);
   allow = asm_head_side_base(as, irp, allow);
 
 
   /* Scan all parent SLOADs and collect register dependencies. */
   /* Scan all parent SLOADs and collect register dependencies. */
@@ -1563,7 +1913,7 @@ static void asm_tail_link(ASMState *as)
     mres = (int32_t)(snap->nslots - baseslot);
     mres = (int32_t)(snap->nslots - baseslot);
     switch (bc_op(*pc)) {
     switch (bc_op(*pc)) {
     case BC_CALLM: case BC_CALLMT:
     case BC_CALLM: case BC_CALLMT:
-      mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break;
+      mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
     case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
     case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
     case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
     case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
     default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
     default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1671,7 +2021,7 @@ static void asm_setup_regsp(ASMState *as)
 	as->modset |= RSET_SCRATCH;
 	as->modset |= RSET_SCRATCH;
       continue;
       continue;
       }
       }
-    case IR_CALLN: case IR_CALLL: case IR_CALLS: {
+    case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
       const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
       const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
       ir->prev = asm_setup_call_slots(as, ir, ci);
       ir->prev = asm_setup_call_slots(as, ir, ci);
       if (inloop)
       if (inloop)
@@ -1716,10 +2066,20 @@ static void asm_setup_regsp(ASMState *as)
       /* fallthrough */
       /* fallthrough */
 #endif
 #endif
     /* C calls evict all scratch regs and return results in RID_RET. */
     /* C calls evict all scratch regs and return results in RID_RET. */
-    case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
+    case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
       if (REGARG_NUMGPR < 3 && as->evenspill < 3)
       if (REGARG_NUMGPR < 3 && as->evenspill < 3)
 	as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
 	as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
-    case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR:
+#if LJ_TARGET_X86 && LJ_HASFFI
+      if (0) {
+    case IR_CNEW:
+	if (ir->op2 != REF_NIL && as->evenspill < 4)
+	  as->evenspill = 4;  /* lj_cdata_newv needs 4 args. */
+      }
+#else
+    case IR_CNEW:
+#endif
+    case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
+    case IR_BUFSTR:
       ir->prev = REGSP_HINT(RID_RET);
       ir->prev = REGSP_HINT(RID_RET);
       if (inloop)
       if (inloop)
 	as->modset = RSET_SCRATCH;
 	as->modset = RSET_SCRATCH;
@@ -1728,21 +2088,26 @@ static void asm_setup_regsp(ASMState *as)
       if (inloop)
       if (inloop)
 	as->modset = RSET_SCRATCH;
 	as->modset = RSET_SCRATCH;
       break;
       break;
-#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP
-    case IR_ATAN2: case IR_LDEXP:
+#if !LJ_SOFTFP
+    case IR_ATAN2:
+#if LJ_TARGET_X86
+      if (as->evenspill < 4)  /* Leave room to call atan2(). */
+	as->evenspill = 4;
+#endif
+#if !LJ_TARGET_X86ORX64
+    case IR_LDEXP:
+#endif
 #endif
 #endif
     case IR_POW:
     case IR_POW:
       if (!LJ_SOFTFP && irt_isnum(ir->t)) {
       if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-#if LJ_TARGET_X86ORX64
-	ir->prev = REGSP_HINT(RID_XMM0);
 	if (inloop)
 	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
+	  as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
+	break;
 #else
 #else
 	ir->prev = REGSP_HINT(RID_FPRET);
 	ir->prev = REGSP_HINT(RID_FPRET);
-	if (inloop)
-	  as->modset |= RSET_SCRATCH;
-#endif
 	continue;
 	continue;
+#endif
       }
       }
       /* fallthrough for integer POW */
       /* fallthrough for integer POW */
     case IR_DIV: case IR_MOD:
     case IR_DIV: case IR_MOD:
@@ -1755,26 +2120,25 @@ static void asm_setup_regsp(ASMState *as)
       break;
       break;
     case IR_FPMATH:
     case IR_FPMATH:
 #if LJ_TARGET_X86ORX64
 #if LJ_TARGET_X86ORX64
-      if (ir->op2 == IRFPM_EXP2) {  /* May be joined to lj_vm_pow_sse. */
-	ir->prev = REGSP_HINT(RID_XMM0);
-#if !LJ_64
-	if (as->evenspill < 4)  /* Leave room for 16 byte scratch area. */
+      if (ir->op2 <= IRFPM_TRUNC) {
+	if (!(as->flags & JIT_F_SSE4_1)) {
+	  ir->prev = REGSP_HINT(RID_XMM0);
+	  if (inloop)
+	    as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+	  continue;
+	}
+	break;
+      } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
+	if (as->evenspill < 4)  /* Leave room to call pow(). */
 	  as->evenspill = 4;
 	  as->evenspill = 4;
-#endif
-	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-	continue;
-      } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
-	ir->prev = REGSP_HINT(RID_XMM0);
-	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
-	continue;
       }
       }
+#endif
+      if (inloop)
+	as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
       break;
       break;
 #else
 #else
       ir->prev = REGSP_HINT(RID_FPRET);
       ir->prev = REGSP_HINT(RID_FPRET);
-      if (inloop)
-	as->modset |= RSET_SCRATCH;
       continue;
       continue;
 #endif
 #endif
 #if LJ_TARGET_X86ORX64
 #if LJ_TARGET_X86ORX64

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lj_asm.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** IR assembler (SSA IR -> machine code).
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_ASM_H
 #ifndef _LJ_ASM_H

+ 150 - 294
Source/ThirdParty/LuaJIT/src/lj_asm_arm.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** ARM IR assembler (SSA IR -> machine code).
 ** ARM IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Register allocator extensions --------------------------------------- */
 /* -- Register allocator extensions --------------------------------------- */
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
 /* Generate a call to a C function. */
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 0;
   int32_t ofs = 0;
 #if LJ_SOFTFP
 #if LJ_SOFTFP
   Reg gpr = REGARG_FIRSTGPR;
   Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   UNUSED(ci);
   UNUSED(ci);
 }
 }
 
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 static void asm_callx(ASMState *as, IRIns *ir)
 {
 {
   IRRef args[CCI_NARGS_MAX*2];
   IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
@@ -529,6 +520,8 @@ static void asm_tobit(ASMState *as, IRIns *ir)
   emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
   emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
   emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
   emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
 }
 }
+#else
+#define asm_tobit(as, ir)	lua_assert(0)
 #endif
 #endif
 
 
 static void asm_conv(ASMState *as, IRIns *ir)
 static void asm_conv(ASMState *as, IRIns *ir)
@@ -601,31 +594,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-#if !LJ_SOFTFP && LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  CCallInfo ci;
-  IRRef args[2];
-  args[0] = (ir-1)->op1;
-  args[1] = ir->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = lj_ir_callinfo[id];
-#if !LJ_ABI_SOFTFP
-  ci.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
-#endif
-  asm_setupresult(as, ir, &ci);
-  asm_gencall(as, &ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
 {
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,6 +657,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
     emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
 }
 }
 
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
 {
@@ -714,7 +684,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
       Reg src = ra_alloc1(as, ref, allow);
       Reg src = ra_alloc1(as, ref, allow);
       emit_lso(as, ARMI_STR, src, RID_SP, 0);
       emit_lso(as, ARMI_STR, src, RID_SP, 0);
     }
     }
-    if ((ir+1)->o == IR_HIOP)
+    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
       type = ra_alloc1(as, ref+1, allow);
       type = ra_alloc1(as, ref+1, allow);
     else
     else
       type = ra_allock(as, irt_toitype(ir->t), allow);
       type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -722,27 +692,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
   }
 }
 }
 
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 static void asm_aref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,20 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
     emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
 }
 }
 
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1106,7 +1041,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 }
 
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
 {
   if (ir->r != RID_SINK) {
   if (ir->r != RID_SINK) {
     Reg src = ra_alloc1(as, ir->op2,
     Reg src = ra_alloc1(as, ir->op2,
@@ -1116,6 +1051,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
   }
 }
 }
 
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
 {
   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1273,19 +1210,16 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
+  IRRef args[4];
   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
   RegSet drop = RSET_SCRATCH;
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
   ra_evictset(as, drop);
@@ -1307,16 +1241,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; ir--;
       ofs -= 4; ir--;
     }
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   {
   {
-    uint32_t k = emit_isk12(ARMI_MOV, ctypeid);
-    Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow);
+    uint32_t k = emit_isk12(ARMI_MOV, id);
+    Reg r = k ? RID_R1 : ra_allock(as, id, allow);
     emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
     emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
     emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
     emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
     emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
     emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
     if (k) emit_d(as, ARMI_MOV^k, RID_R1);
     if (k) emit_d(as, ARMI_MOV^k, RID_R1);
   }
   }
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1393,24 +1339,41 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
   emit_dm(as, ai, (dest & 15), (left & 15));
   emit_dm(as, ai, (dest & 15), (left & 15));
 }
 }
 
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_callround(ASMState *as, IRIns *ir, int id)
+{
+  /* The modified regs must match with the *.dasc implementation. */
+  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
+		RID2RSET(RID_R3)|RID2RSET(RID_R12);
+  RegSet of;
+  Reg dest, src;
+  ra_evictset(as, drop);
+  dest = ra_dest(as, ir, RSET_FPR);
+  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
+  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
+		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
+				   (void *)lj_vm_trunc_sf);
+  /* Workaround to protect argument GPRs from being used for remat. */
+  of = as->freeset;
+  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
+  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
+  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
+  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
+  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 <= IRFPM_TRUNC)
+    asm_callround(as, ir, ir->op2);
+  else if (ir->op2 == IRFPM_SQRT)
+    asm_fpunary(as, ir, ARMI_VSQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 }
+#else
+#define asm_fpmath(as, ir)	lua_assert(0)
 #endif
 #endif
 
 
 static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
 static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1460,32 +1423,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
   asm_intop(as, ir, ai);
   asm_intop(as, ir, ai);
 }
 }
 
 
-static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
-{
-  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
-    uint32_t cc = (as->mcp[1] >> 28);
-    as->flagmcp = NULL;
-    if (cc <= CC_NE) {
-      as->mcp++;
-      ai |= ARMI_S;
-    } else if (cc == CC_GE) {
-      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-      ai |= ARMI_S;
-    } else if (cc == CC_LT) {
-      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-      ai |= ARMI_S;
-    }  /* else: other conds don't work with bit ops. */
-  }
-  if (ir->op2 == 0) {
-    Reg dest = ra_dest(as, ir, RSET_GPR);
-    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
-    emit_d(as, ai^m, dest);
-  } else {
-    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
-    asm_intop(as, ir, ai);
-  }
-}
-
 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1551,6 +1488,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
   asm_intmul(as, ir);
   asm_intmul(as, ir);
 }
 }
 
 
+#define asm_addov(as, ir)	asm_add(as, ir)
+#define asm_subov(as, ir)	asm_sub(as, ir)
+#define asm_mulov(as, ir)	asm_mul(as, ir)
+
+#if LJ_SOFTFP
+#define asm_div(as, ir)		lua_assert(0)
+#define asm_pow(as, ir)		lua_assert(0)
+#define asm_abs(as, ir)		lua_assert(0)
+#define asm_atan2(as, ir)	lua_assert(0)
+#define asm_ldexp(as, ir)	lua_assert(0)
+#else
+#define asm_div(as, ir)		asm_fparith(as, ir, ARMI_VDIV_D)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+#define asm_abs(as, ir)		asm_fpunary(as, ir, ARMI_VABS_D)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+#endif
+
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {
 #if !LJ_SOFTFP
 #if !LJ_SOFTFP
@@ -1562,41 +1519,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
   asm_intneg(as, ir, ARMI_RSB);
   asm_intneg(as, ir, ARMI_RSB);
 }
 }
 
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
 {
 {
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
+  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
+    uint32_t cc = (as->mcp[1] >> 28);
+    as->flagmcp = NULL;
+    if (cc <= CC_NE) {
+      as->mcp++;
+      ai |= ARMI_S;
+    } else if (cc == CC_GE) {
+      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
+      ai |= ARMI_S;
+    } else if (cc == CC_LT) {
+      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
+      ai |= ARMI_S;
+    }  /* else: other conds don't work with bit ops. */
+  }
+  if (ir->op2 == 0) {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+    emit_d(as, ai^m, dest);
+  } else {
+    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
+    asm_intop(as, ir, ai);
+  }
 }
 }
 
 
-#if !LJ_SOFTFP
-static void asm_callround(ASMState *as, IRIns *ir, int id)
-{
-  /* The modified regs must match with the *.dasc implementation. */
-  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
-		RID2RSET(RID_R3)|RID2RSET(RID_R12);
-  RegSet of;
-  Reg dest, src;
-  ra_evictset(as, drop);
-  dest = ra_dest(as, ir, RSET_FPR);
-  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
-  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
-		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
-				   (void *)lj_vm_trunc_sf);
-  /* Workaround to protect argument GPRs from being used for remat. */
-  of = as->freeset;
-  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
-  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
-  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
-  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
-  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
-}
-#endif
+#define asm_bnot(as, ir)	asm_bitop(as, ir, ARMI_MVN)
 
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1613,6 +1564,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_band(as, ir)	asm_bitop(as, ir, ARMI_AND)
+#define asm_bor(as, ir)		asm_bitop(as, ir, ARMI_ORR)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, ARMI_EOR)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
 static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
 {
 {
   if (irref_isk(ir->op2)) {  /* Constant shifts. */
   if (irref_isk(ir->op2)) {  /* Constant shifts. */
@@ -1630,6 +1585,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
   }
   }
 }
 }
 
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, ARMSH_LSL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, ARMSH_LSR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, ARMSH_ASR)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, ARMSH_ROR)
+#define asm_brol(as, ir)	lua_assert(0)
+
 static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 {
 {
   uint32_t kcmp = 0, kmov = 0;
   uint32_t kcmp = 0, kmov = 0;
@@ -1703,6 +1664,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
     asm_intmin_max(as, ir, cc);
     asm_intmin_max(as, ir, cc);
 }
 }
 
 
+#define asm_min(as, ir)		asm_min_max(as, ir, CC_GT, CC_HI)
+#define asm_max(as, ir)		asm_min_max(as, ir, CC_LT, CC_LO)
+
 /* -- Comparisons --------------------------------------------------------- */
 /* -- Comparisons --------------------------------------------------------- */
 
 
 /* Map of comparisons to flags. ORDER IR. */
 /* Map of comparisons to flags. ORDER IR. */
@@ -1818,6 +1782,18 @@ notst:
     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
 }
 }
 
 
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+  if (irt_isnum(ir->t))
+    asm_fpcomp(as, ir);
+  else
+#endif
+    asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_HASFFI
 #if LJ_HASFFI
 /* 64 bit integer comparisons. */
 /* 64 bit integer comparisons. */
 static void asm_int64comp(ASMState *as, IRIns *ir)
 static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1892,7 +1868,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
   } else if ((ir-1)->o == IR_XSTORE) {
   } else if ((ir-1)->o == IR_XSTORE) {
     if ((ir-1)->r != RID_SINK)
     if ((ir-1)->r != RID_SINK)
-      asm_xstore(as, ir, 4);
+      asm_xstore_(as, ir, 4);
     return;
     return;
   }
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
@@ -1940,6 +1916,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
+  emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 /* -- Stack handling ------------------------------------------------------ */
 
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1969,7 +1955,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
 	   (int32_t)offsetof(lua_State, maxstack));
 	   (int32_t)offsetof(lua_State, maxstack));
   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
-    int32_t i = i32ptr(&J2G(as->J)->jit_L);
+    int32_t i = i32ptr(&J2G(as->J)->cur_L);
     if (ra_hasspill(irp->s))
     if (ra_hasspill(irp->s))
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1977,7 +1963,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
     emit_loadi(as, RID_TMP, (i & ~4095));
     emit_loadi(as, RID_TMP, (i & ~4095));
   } else {
   } else {
-    emit_getgl(as, RID_TMP, jit_L);
+    emit_getgl(as, RID_TMP, cur_L);
   }
   }
 }
 }
 
 
@@ -2086,13 +2072,13 @@ static void asm_loop_fixup(ASMState *as)
 
 
 /* -- Head of trace ------------------------------------------------------- */
 /* -- Head of trace ------------------------------------------------------- */
 
 
-/* Reload L register from g->jit_L. */
+/* Reload L register from g->cur_L. */
 static void asm_head_lreg(ASMState *as)
 static void asm_head_lreg(ASMState *as)
 {
 {
   IRIns *ir = IR(ASMREF_L);
   IRIns *ir = IR(ASMREF_L);
   if (ra_used(ir)) {
   if (ra_used(ir)) {
     Reg r = ra_dest(as, ir, RSET_GPR);
     Reg r = ra_dest(as, ir, RSET_GPR);
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
     ra_evictk(as);
     ra_evictk(as);
   }
   }
 }
 }
@@ -2163,143 +2149,13 @@ static void asm_tail_prep(ASMState *as)
   *p = 0;  /* Prevent load/store merging. */
   *p = 0;  /* Prevent load/store merging. */
 }
 }
 
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-      break;
-    }
-    /* fallthrough */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-#if !LJ_SOFTFP
-    if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
-#endif
-    asm_intcomp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
-  case IR_BOR:  asm_bitop(as, ir, ARMI_ORR); break;
-  case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
-  case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
-  case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
-  case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
-  case IR_BROL: lua_assert(0); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
-  case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
-  case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-#if LJ_SOFTFP
-  case IR_DIV: case IR_POW: case IR_ABS:
-  case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
-    lua_assert(0);  /* Unused for LJ_SOFTFP. */
-    break;
-#else
-  case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 <= IRFPM_TRUNC)
-      asm_callround(as, ir, ir->op2);
-    else if (ir->op2 == IRFPM_SQRT)
-      asm_fpunary(as, ir, ARMI_VSQRT_D);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-#endif
-
-  case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
-  case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir, 0); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 /* -- Trace setup --------------------------------------------------------- */
 
 
 /* Ensure there are enough stack slots for call arguments. */
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
 {
   IRRef args[CCI_NARGS_MAX*2];
   IRRef args[CCI_NARGS_MAX*2];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
   asm_collectargs(as, ir, ci, args);
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++) {
   for (i = 0; i < nargs; i++) {

+ 111 - 255
Source/ThirdParty/LuaJIT/src/lj_asm_mips.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** MIPS IR assembler (SSA IR -> machine code).
 ** MIPS IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Register allocator extensions --------------------------------------- */
 /* -- Register allocator extensions --------------------------------------- */
@@ -226,7 +226,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
 /* Generate a call to a C function. */
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 16;
   int32_t ofs = 16;
   Reg gpr, fpr = REGARG_FIRSTFPR;
   Reg gpr, fpr = REGARG_FIRSTFPR;
   if ((void *)ci->func)
   if ((void *)ci->func)
@@ -326,15 +326,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   }
   }
 }
 }
 
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 static void asm_callx(ASMState *as, IRIns *ir)
 {
 {
   IRRef args[CCI_NARGS_MAX*2];
   IRRef args[CCI_NARGS_MAX*2];
@@ -362,16 +353,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
   asm_gencall(as, &ci, args);
   asm_gencall(as, &ci, args);
 }
 }
 
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
 static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
 {
 {
   /* The modified regs must match with the *.dasc implementation. */
   /* The modified regs must match with the *.dasc implementation. */
@@ -391,7 +372,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
@@ -520,28 +501,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  const CCallInfo *ci;
-  IRRef args[2];
-  args[LJ_BE?0:1] = ir->op1;
-  args[LJ_BE?1:0] = (ir-1)->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = &lj_ir_callinfo[id];
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
 {
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -558,6 +517,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
 	   RID_SP, sps_scale(ir->s));
 	   RID_SP, sps_scale(ir->s));
 }
 }
 
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
 {
@@ -581,27 +542,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
   }
 }
 }
 
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 static void asm_aref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -631,7 +571,7 @@ static void asm_aref(ASMState *as, IRIns *ir)
 **   } while ((n = nextnode(n)));
 **   } while ((n = nextnode(n)));
 **   return niltv(L);
 **   return niltv(L);
 */
 */
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 {
 {
   RegSet allow = RSET_GPR;
   RegSet allow = RSET_GPR;
   int destused = ra_used(ir);
   int destused = ra_used(ir);
@@ -657,37 +597,42 @@ static void asm_href(ASMState *as, IRIns *ir)
   tmp2 = ra_scratch(as, allow);
   tmp2 = ra_scratch(as, allow);
   rset_clear(allow, tmp2);
   rset_clear(allow, tmp2);
 
 
-  /* Key not found in chain: load niltv. */
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
   l_end = emit_label(as);
   l_end = emit_label(as);
-  if (destused)
+  as->invmcp = NULL;
+  if (merge == IR_NE)
+    asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
+  else if (destused)
     emit_loada(as, dest, niltvg(J2G(as->J)));
     emit_loada(as, dest, niltvg(J2G(as->J)));
-  else
-    *--as->mcp = MIPSI_NOP;
   /* Follow hash chain until the end. */
   /* Follow hash chain until the end. */
-  emit_move(as, dest, tmp1);
+  emit_move(as, dest, tmp2);
   l_loop = --as->mcp;
   l_loop = --as->mcp;
-  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next));
+  emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next));
   l_next = emit_label(as);
   l_next = emit_label(as);
 
 
   /* Type and value comparison. */
   /* Type and value comparison. */
+  if (merge == IR_EQ) {  /* Must match asm_guard(). */
+    emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
+    l_end = asm_exitstub_addr(as);
+  }
   if (irt_isnum(kt)) {
   if (irt_isnum(kt)) {
     emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
     emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
     emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
     emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
-	emit_tg(as, MIPSI_MFC1, tmp1, key+1);
-    emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
-    emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
+    *--as->mcp = MIPSI_NOP;  /* Avoid NaN comparison overhead. */
+    emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next);
+    emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM);
     emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
     emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
   } else {
   } else {
     if (irt_ispri(kt)) {
     if (irt_ispri(kt)) {
-      emit_branch(as, MIPSI_BEQ, tmp1, type, l_end);
+      emit_branch(as, MIPSI_BEQ, tmp2, type, l_end);
     } else {
     } else {
-      emit_branch(as, MIPSI_BEQ, tmp2, key, l_end);
-      emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
-      emit_branch(as, MIPSI_BNE, tmp1, type, l_next);
+      emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
+      emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr));
+      emit_branch(as, MIPSI_BNE, tmp2, type, l_next);
     }
     }
   }
   }
-  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
-  *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
+  emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it));
+  *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);
 
 
   /* Load main position relative to tab->node into dest. */
   /* Load main position relative to tab->node into dest. */
   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
@@ -777,20 +722,6 @@ nolo:
     emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
     emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
 }
 }
 
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  if (ir->r != RID_SINK) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-    IRRef args[3];
-    args[0] = ASMREF_L;     /* lua_State *L */
-    args[1] = ir->op1;      /* GCtab *t     */
-    args[2] = ASMREF_TMP1;  /* cTValue *key */
-    asm_setupresult(as, ir, ci);  /* TValue * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-  }
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -919,7 +850,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 }
 
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
 {
   if (ir->r != RID_SINK) {
   if (ir->r != RID_SINK) {
     Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
     Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
@@ -928,6 +859,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
   }
 }
 }
 
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
 {
   IRType1 t = ir->t;
   IRType1 t = ir->t;
@@ -1003,7 +936,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
       if (irt_isint(t)) {
       if (irt_isint(t)) {
 	Reg tmp = ra_scratch(as, RSET_FPR);
 	Reg tmp = ra_scratch(as, RSET_FPR);
 	emit_tg(as, MIPSI_MFC1, dest, tmp);
 	emit_tg(as, MIPSI_MFC1, dest, tmp);
-	emit_fg(as, MIPSI_CVT_W_D, tmp, tmp);
+	emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
 	dest = tmp;
 	dest = tmp;
 	t.irt = IRT_NUM;  /* Check for original type. */
 	t.irt = IRT_NUM;  /* Check for original type. */
       } else {
       } else {
@@ -1043,19 +976,15 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  IRRef args[4];
   RegSet drop = RSET_SCRATCH;
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
   ra_evictset(as, drop);
@@ -1064,6 +993,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
 
 
   /* Initialize immutable cdata object. */
   /* Initialize immutable cdata object. */
   if (ir->o == IR_CNEWI) {
   if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
     int32_t ofs = sizeof(GCcdata);
     int32_t ofs = sizeof(GCcdata);
     lua_assert(sz == 4 || sz == 8);
     lua_assert(sz == 4 || sz == 8);
     if (sz == 8) {
     if (sz == 8) {
@@ -1078,12 +1008,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; if (LJ_BE) ir++; else ir--;
       ofs -= 4; if (LJ_BE) ir++; else ir--;
     }
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
   emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
-  emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */
+  emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1153,23 +1095,16 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
   emit_fg(as, mi, dest, left);
   emit_fg(as, mi, dest, left);
 }
 }
 
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 <= IRFPM_TRUNC)
+    asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
+  else if (ir->op2 == IRFPM_SQRT)
+    asm_fpunary(as, ir, MIPSI_SQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 }
 
 
 static void asm_add(ASMState *as, IRIns *ir)
 static void asm_add(ASMState *as, IRIns *ir)
@@ -1215,6 +1150,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_div(as, ir)		asm_fparith(as, ir, MIPSI_DIV_D)
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {
   if (irt_isnum(ir->t)) {
   if (irt_isnum(ir->t)) {
@@ -1226,6 +1165,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_abs(as, ir)		asm_fpunary(as, ir, MIPSI_ABS_D)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
 static void asm_arithov(ASMState *as, IRIns *ir)
 static void asm_arithov(ASMState *as, IRIns *ir)
 {
 {
   Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
   Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
@@ -1259,13 +1202,21 @@ static void asm_arithov(ASMState *as, IRIns *ir)
     emit_move(as, RID_TMP, dest == left ? left : right);
     emit_move(as, RID_TMP, dest == left ? left : right);
 }
 }
 
 
+#define asm_addov(as, ir)	asm_arithov(as, ir)
+#define asm_subov(as, ir)	asm_arithov(as, ir)
+
 static void asm_mulov(ASMState *as, IRIns *ir)
 static void asm_mulov(ASMState *as, IRIns *ir)
 {
 {
-#if LJ_DUALNUM
-#error "NYI: MULOV"
-#else
-  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused in single-number mode. */
-#endif
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
+						 right), dest));
+  asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
+  emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
+  emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
+  emit_dst(as, MIPSI_MFLO, dest, 0, 0);
+  emit_dst(as, MIPSI_MULT, 0, left, right);
 }
 }
 
 
 #if LJ_HASFFI
 #if LJ_HASFFI
@@ -1352,7 +1303,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
 }
 }
 #endif
 #endif
 
 
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
 {
 {
   Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
   Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
   IRIns *irl = IR(ir->op1);
   IRIns *irl = IR(ir->op1);
@@ -1366,7 +1317,7 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
   emit_dst(as, MIPSI_NOR, dest, left, right);
   emit_dst(as, MIPSI_NOR, dest, left, right);
 }
 }
 
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1402,6 +1353,10 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   emit_dst(as, mi, dest, left, right);
   emit_dst(as, mi, dest, left, right);
 }
 }
 
 
+#define asm_band(as, ir)	asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
+#define asm_bor(as, ir)		asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
 static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1415,7 +1370,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   }
   }
 }
 }
 
 
-static void asm_bitror(ASMState *as, IRIns *ir)
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
+#define asm_brol(as, ir)	lua_assert(0)
+
+static void asm_bror(ASMState *as, IRIns *ir)
 {
 {
   if ((as->flags & JIT_F_MIPS32R2)) {
   if ((as->flags & JIT_F_MIPS32R2)) {
     asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
     asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
@@ -1464,6 +1424,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
   }
   }
 }
 }
 
 
+#define asm_min(as, ir)		asm_min_max(as, ir, 0)
+#define asm_max(as, ir)		asm_min_max(as, ir, 1)
+
 /* -- Comparisons --------------------------------------------------------- */
 /* -- Comparisons --------------------------------------------------------- */
 
 
 static void asm_comp(ASMState *as, IRIns *ir)
 static void asm_comp(ASMState *as, IRIns *ir)
@@ -1501,7 +1464,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-static void asm_compeq(ASMState *as, IRIns *ir)
+static void asm_equal(ASMState *as, IRIns *ir)
 {
 {
   Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
   Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
   right = (left >> 8); left &= 255;
   right = (left >> 8); left &= 255;
@@ -1575,8 +1538,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   } else if ((ir-1)->o == IR_XSTORE) {
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
     if ((ir-1)->r != RID_SINK) {
-      asm_xstore(as, ir, LJ_LE ? 4 : 0);
-      asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
+      asm_xstore_(as, ir, LJ_LE ? 4 : 0);
+      asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
     }
     }
     return;
     return;
   }
   }
@@ -1600,6 +1563,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
+  emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
+  emit_lsglptr(as, MIPSI_LBU, RID_TMP,
+	       (int32_t)offsetof(global_State, hookmask));
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 /* -- Stack handling ------------------------------------------------------ */
 
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1624,7 +1598,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
 }
 }
@@ -1772,131 +1746,13 @@ static void asm_tail_prep(ASMState *as)
   as->invmcp = as->loopref ? as->mcp : NULL;
   as->invmcp = as->loopref ? as->mcp : NULL;
 }
 }
 
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-    asm_comp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitnot(as, ir); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
-  case IR_BOR:  asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
-  case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
-  case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
-  case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
-  case IR_BROL: lua_assert(0); break;
-  case IR_BROR: asm_bitror(as, ir); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB: asm_sub(as, ir); break;
-  case IR_MUL: asm_mul(as, ir); break;
-  case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-  case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_MIN: asm_min_max(as, ir, 0); break;
-  case IR_MAX: asm_min_max(as, ir, 1); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 <= IRFPM_TRUNC)
-      asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    else if (ir->op2 == IRFPM_SQRT)
-      asm_fpunary(as, ir, MIPSI_SQRT_D);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-
-  /* Overflow-checking arithmetic ops. */
-  case IR_ADDOV: asm_arithov(as, ir); break;
-  case IR_SUBOV: asm_arithov(as, ir); break;
-  case IR_MULOV: asm_mulov(as, ir); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 /* -- Trace setup --------------------------------------------------------- */
 
 
 /* Ensure there are enough stack slots for call arguments. */
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
 {
   IRRef args[CCI_NARGS_MAX*2];
   IRRef args[CCI_NARGS_MAX*2];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++) {
   for (i = 0; i < nargs; i++) {

+ 113 - 265
Source/ThirdParty/LuaJIT/src/lj_asm_ppc.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** PPC IR assembler (SSA IR -> machine code).
 ** PPC IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Register allocator extensions --------------------------------------- */
 /* -- Register allocator extensions --------------------------------------- */
@@ -49,6 +49,8 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
 {
 {
   ExitNo i;
   ExitNo i;
   MCode *mxp = as->mctop;
   MCode *mxp = as->mctop;
+  if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+    asm_mclimit(as);
   /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
   /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
   for (i = nexits-1; (int32_t)i >= 0; i--)
   for (i = nexits-1; (int32_t)i >= 0; i--)
     *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
     *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
@@ -249,7 +251,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
 /* Generate a call to a C function. */
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 8;
   int32_t ofs = 8;
   Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
   Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
   if ((void *)ci->func)
   if ((void *)ci->func)
@@ -321,23 +323,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       } else {
       } else {
 	ra_destreg(as, ir, RID_FPRET);
 	ra_destreg(as, ir, RID_FPRET);
       }
       }
+#if LJ_32
     } else if (hiop) {
     } else if (hiop) {
       ra_destpair(as, ir);
       ra_destpair(as, ir);
+#endif
     } else {
     } else {
       ra_destreg(as, ir, RID_RET);
       ra_destreg(as, ir, RID_RET);
     }
     }
   }
   }
 }
 }
 
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 static void asm_callx(ASMState *as, IRIns *ir)
 {
 {
   IRRef args[CCI_NARGS_MAX*2];
   IRRef args[CCI_NARGS_MAX*2];
@@ -350,7 +345,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
   func = ir->op2; irf = IR(func);
   func = ir->op2; irf = IR(func);
   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
   if (irref_isk(func)) {  /* Call to constant address. */
   if (irref_isk(func)) {  /* Call to constant address. */
-    ci.func = (ASMFunction)(void *)(irf->i);
+    ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
   } else {  /* Need a non-argument register for indirect calls. */
   } else {  /* Need a non-argument register for indirect calls. */
     RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
     RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
     Reg freg = ra_alloc1(as, func, allow);
     Reg freg = ra_alloc1(as, func, allow);
@@ -361,16 +356,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
   asm_gencall(as, &ci, args);
   asm_gencall(as, &ci, args);
 }
 }
 
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 /* -- Returns ------------------------------------------------------------- */
 /* -- Returns ------------------------------------------------------------- */
 
 
 /* Return to lower frame. Guard that it goes to the right spot. */
 /* Return to lower frame. Guard that it goes to the right spot. */
@@ -378,7 +363,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
@@ -511,28 +496,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  const CCallInfo *ci;
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = (ir-1)->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = &lj_ir_callinfo[id];
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
 {
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -551,6 +514,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
   emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
   emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
 }
 }
 
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
 {
@@ -564,7 +529,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
     /* Otherwise use g->tmptv to hold the TValue. */
     /* Otherwise use g->tmptv to hold the TValue. */
     RegSet allow = rset_exclude(RSET_GPR, dest);
     RegSet allow = rset_exclude(RSET_GPR, dest);
     Reg type;
     Reg type;
-    emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
+    emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
     if (!irt_ispri(ir->t)) {
     if (!irt_ispri(ir->t)) {
       Reg src = ra_alloc1(as, ref, allow);
       Reg src = ra_alloc1(as, ref, allow);
       emit_setgl(as, src, tmptv.gcr);
       emit_setgl(as, src, tmptv.gcr);
@@ -574,27 +539,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
   }
 }
 }
 
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 static void asm_aref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -771,20 +715,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -915,7 +845,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 }
 
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
 {
   IRIns *irb;
   IRIns *irb;
   if (ir->r == RID_SINK)
   if (ir->r == RID_SINK)
@@ -932,6 +862,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
   }
 }
 }
 
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
 {
   IRType1 t = ir->t;
   IRType1 t = ir->t;
@@ -1082,19 +1014,15 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  IRRef args[4];
   RegSet drop = RSET_SCRATCH;
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
   ra_evictset(as, drop);
@@ -1103,6 +1031,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
 
 
   /* Initialize immutable cdata object. */
   /* Initialize immutable cdata object. */
   if (ir->o == IR_CNEWI) {
   if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
     int32_t ofs = sizeof(GCcdata);
     int32_t ofs = sizeof(GCcdata);
     lua_assert(sz == 4 || sz == 8);
     lua_assert(sz == 4 || sz == 8);
     if (sz == 8) {
     if (sz == 8) {
@@ -1116,12 +1045,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; ir++;
       ofs -= 4; ir++;
     }
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
   emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
-  emit_ti(as, PPCI_LI, RID_TMP, ctypeid);  /* Lower 16 bit used. Sign-ext ok. */
+  emit_ti(as, PPCI_LI, RID_TMP, id);  /* Lower 16 bit used. Sign-ext ok. */
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1195,23 +1136,14 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
   emit_fb(as, pi, dest, left);
   emit_fb(as, pi, dest, left);
 }
 }
 
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
+    asm_fpunary(as, ir, PPCI_FSQRT);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 }
 
 
 static void asm_add(ASMState *as, IRIns *ir)
 static void asm_add(ASMState *as, IRIns *ir)
@@ -1311,6 +1243,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_div(as, ir)		asm_fparith(as, ir, PPCI_FDIV)
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {
   if (irt_isnum(ir->t)) {
   if (irt_isnum(ir->t)) {
@@ -1329,6 +1265,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_abs(as, ir)		asm_fpunary(as, ir, PPCI_FABS)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
 {
 {
   Reg dest, left, right;
   Reg dest, left, right;
@@ -1344,6 +1284,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
   emit_tab(as, pi|PPCF_DOT, dest, left, right);
   emit_tab(as, pi|PPCF_DOT, dest, left, right);
 }
 }
 
 
+#define asm_addov(as, ir)	asm_arithov(as, ir, PPCI_ADDO)
+#define asm_subov(as, ir)	asm_arithov(as, ir, PPCI_SUBFO)
+#define asm_mulov(as, ir)	asm_arithov(as, ir, PPCI_MULLWO)
+
 #if LJ_HASFFI
 #if LJ_HASFFI
 static void asm_add64(ASMState *as, IRIns *ir)
 static void asm_add64(ASMState *as, IRIns *ir)
 {
 {
@@ -1423,7 +1367,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
 }
 }
 #endif
 #endif
 
 
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
 {
 {
   Reg dest, left, right;
   Reg dest, left, right;
   PPCIns pi = PPCI_NOR;
   PPCIns pi = PPCI_NOR;
@@ -1450,7 +1394,7 @@ nofuse:
   emit_asb(as, pi, dest, left, right);
   emit_asb(as, pi, dest, left, right);
 }
 }
 
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
   IRIns *irx;
   IRIns *irx;
@@ -1471,32 +1415,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
-{
-  Reg dest = ra_dest(as, ir, RSET_GPR);
-  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
-  if (irref_isk(ir->op2)) {
-    int32_t k = IR(ir->op2)->i;
-    Reg tmp = left;
-    if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
-      if (!checku16(k)) {
-	emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
-	if ((k & 0xffff) == 0) return;
-      }
-      emit_asi(as, pik, dest, left, k);
-      return;
-    }
-  }
-  /* May fail due to spills/restores above, but simplifies the logic. */
-  if (as->flagmcp == as->mcp) {
-    as->flagmcp = NULL;
-    as->mcp++;
-    pi |= PPCF_DOT;
-  }
-  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
-  emit_asb(as, pi, dest, left, right);
-}
-
 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
 {
 {
@@ -1527,7 +1445,7 @@ nofuse:
   *--as->mcp = pi | PPCF_T(left);
   *--as->mcp = pi | PPCF_T(left);
 }
 }
 
 
-static void asm_bitand(ASMState *as, IRIns *ir)
+static void asm_band(ASMState *as, IRIns *ir)
 {
 {
   Reg dest, left, right;
   Reg dest, left, right;
   IRRef lref = ir->op1;
   IRRef lref = ir->op1;
@@ -1582,6 +1500,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
   emit_asb(as, PPCI_AND ^ dot, dest, left, right);
   emit_asb(as, PPCI_AND ^ dot, dest, left, right);
 }
 }
 
 
+static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    Reg tmp = left;
+    if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
+      if (!checku16(k)) {
+	emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
+	if ((k & 0xffff) == 0) return;
+      }
+      emit_asi(as, pik, dest, left, k);
+      return;
+    }
+  }
+  /* May fail due to spills/restores above, but simplifies the logic. */
+  if (as->flagmcp == as->mcp) {
+    as->flagmcp = NULL;
+    as->mcp++;
+    pi |= PPCF_DOT;
+  }
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_asb(as, pi, dest, left, right);
+}
+
+#define asm_bor(as, ir)		asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
 {
 {
   Reg dest, left;
   Reg dest, left;
@@ -1607,6 +1554,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
   }
   }
 }
 }
 
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, PPCI_SLW, 0)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, PPCI_SRW, 1)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
+#define asm_brol(as, ir) \
+  asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
+		       PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
+#define asm_bror(as, ir)	lua_assert(0)
+
 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
 {
 {
   if (irt_isnum(ir->t)) {
   if (irt_isnum(ir->t)) {
@@ -1637,6 +1592,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
   }
   }
 }
 }
 
 
+#define asm_min(as, ir)		asm_min_max(as, ir, 0)
+#define asm_max(as, ir)		asm_min_max(as, ir, 1)
+
 /* -- Comparisons --------------------------------------------------------- */
 /* -- Comparisons --------------------------------------------------------- */
 
 
 #define CC_UNSIGNED	0x08	/* Unsigned integer comparison. */
 #define CC_UNSIGNED	0x08	/* Unsigned integer comparison. */
@@ -1713,6 +1671,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_HASFFI
 #if LJ_HASFFI
 /* 64 bit integer comparisons. */
 /* 64 bit integer comparisons. */
 static void asm_comp64(ASMState *as, IRIns *ir)
 static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1758,8 +1718,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   } else if ((ir-1)->o == IR_XSTORE) {
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
     if ((ir-1)->r != RID_SINK) {
-      asm_xstore(as, ir, 0);
-      asm_xstore(as, ir-1, 4);
+      asm_xstore_(as, ir, 0);
+      asm_xstore_(as, ir-1, 4);
     }
     }
     return;
     return;
   }
   }
@@ -1783,6 +1743,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
+  emit_lsglptr(as, PPCI_LBZ, RID_TMP,
+	       (int32_t)offsetof(global_State, hookmask));
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 /* -- Stack handling ------------------------------------------------------ */
 
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1804,7 +1775,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
   emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
     emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
 }
 }
@@ -1965,136 +1936,13 @@ static void asm_tail_prep(ASMState *as)
   }
   }
 }
 }
 
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-      break;
-    }
-    /* fallthrough */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-    asm_comp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitnot(as, ir); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitand(as, ir); break;
-  case IR_BOR:  asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
-  case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
-  case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
-  case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
-  case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
-			     PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
-  case IR_BROR: lua_assert(0); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB: asm_sub(as, ir); break;
-  case IR_MUL: asm_mul(as, ir); break;
-  case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-  case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_MIN: asm_min_max(as, ir, 0); break;
-  case IR_MAX: asm_min_max(as, ir, 1); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
-      asm_fpunary(as, ir, PPCI_FSQRT);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-
-  /* Overflow-checking arithmetic ops. */
-  case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
-  case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
-  case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir, 0); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 /* -- Trace setup --------------------------------------------------------- */
 
 
 /* Ensure there are enough stack slots for call arguments. */
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
 {
   IRRef args[CCI_NARGS_MAX*2];
   IRRef args[CCI_NARGS_MAX*2];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++)
   for (i = 0; i < nargs; i++)

+ 219 - 379
Source/ThirdParty/LuaJIT/src/lj_asm_x86.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** x86/x64 IR assembler (SSA IR -> machine code).
 ** x86/x64 IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Guard handling ------------------------------------------------------ */
 /* -- Guard handling ------------------------------------------------------ */
@@ -325,6 +325,14 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
       as->mrm.base = as->mrm.idx = RID_NONE;
       as->mrm.base = as->mrm.idx = RID_NONE;
       return RID_MRM;
       return RID_MRM;
     }
     }
+  } else if (ir->o == IR_KINT64) {
+    RegSet avail = as->freeset & ~as->modset & RSET_GPR;
+    lua_assert(allow != RSET_EMPTY);
+    if (!(avail & (avail-1))) {  /* Fuse if less than two regs available. */
+      as->mrm.ofs = ptr2addr(ir_kint64(ir));
+      as->mrm.base = as->mrm.idx = RID_NONE;
+      return RID_MRM;
+    }
   } else if (mayfuse(as, ref)) {
   } else if (mayfuse(as, ref)) {
     RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
     RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
     if (ir->o == IR_SLOAD) {
     if (ir->o == IR_SLOAD) {
@@ -361,7 +369,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
       return RID_MRM;
       return RID_MRM;
     }
     }
   }
   }
-  if (!(as->freeset & allow) &&
+  if (!(as->freeset & allow) && !irref_isk(ref) &&
       (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
       (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
     goto fusespill;
     goto fusespill;
   return ra_allocref(as, ref, allow);
   return ra_allocref(as, ref, allow);
@@ -384,7 +392,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
 /* Count the required number of stack slots for a call. */
 /* Count the required number of stack slots for a call. */
 static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t i, nargs = CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 0;
   int nslots = 0;
 #if LJ_64
 #if LJ_64
   if (LJ_ABI_WIN) {
   if (LJ_ABI_WIN) {
@@ -417,7 +425,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 /* Generate a call to a C function. */
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = STACKARG_OFS;
   int32_t ofs = STACKARG_OFS;
 #if LJ_64
 #if LJ_64
   uint32_t gprs = REGARG_GPRS;
   uint32_t gprs = REGARG_GPRS;
@@ -552,7 +560,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       if (ra_hasreg(dest)) {
       if (ra_hasreg(dest)) {
 	ra_free(as, dest);
 	ra_free(as, dest);
 	ra_modified(as, dest);
 	ra_modified(as, dest);
-	emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
+	emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
 		  dest, RID_ESP, ofs);
 		  dest, RID_ESP, ofs);
       }
       }
       if ((ci->flags & CCI_CASTU64)) {
       if ((ci->flags & CCI_CASTU64)) {
@@ -571,20 +579,11 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       lua_assert(!irt_ispri(ir->t));
       lua_assert(!irt_ispri(ir->t));
       ra_destreg(as, ir, RID_RET);
       ra_destreg(as, ir, RID_RET);
     }
     }
-  } else if (LJ_32 && irt_isfp(ir->t)) {
+  } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) {
     emit_x87op(as, XI_FPOP);  /* Pop unused result from x87 st0. */
     emit_x87op(as, XI_FPOP);  /* Pop unused result from x87 st0. */
   }
   }
 }
 }
 
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 /* Return a constant function pointer or NULL for indirect calls. */
 /* Return a constant function pointer or NULL for indirect calls. */
 static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
 static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
 {
 {
@@ -644,7 +643,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
@@ -664,8 +663,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
   asm_guardcc(as, CC_NE);
   asm_guardcc(as, CC_NE);
   emit_rr(as, XO_UCOMISD, left, tmp);
   emit_rr(as, XO_UCOMISD, left, tmp);
   emit_rr(as, XO_CVTSI2SD, tmp, dest);
   emit_rr(as, XO_CVTSI2SD, tmp, dest);
-  if (!(as->flags & JIT_F_SPLIT_XMM))
-    emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
+  emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
   emit_rr(as, XO_CVTTSD2SI, dest, left);
   emit_rr(as, XO_CVTTSD2SI, dest, left);
   /* Can't fuse since left is needed twice. */
   /* Can't fuse since left is needed twice. */
 }
 }
@@ -721,8 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
       emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
 	       dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
 	       dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
     }
     }
-    if (!(as->flags & JIT_F_SPLIT_XMM))
-      emit_rr(as, XO_XORPS, dest, dest);  /* Avoid partial register stall. */
+    emit_rr(as, XO_XORPS, dest, dest);  /* Avoid partial register stall. */
   } else if (stfp) {  /* FP to integer conversion. */
   } else if (stfp) {  /* FP to integer conversion. */
     if (irt_isguard(ir->t)) {
     if (irt_isguard(ir->t)) {
       /* Checked conversions are only supported from number to int. */
       /* Checked conversions are only supported from number to int. */
@@ -730,9 +727,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
     } else {
     } else {
       Reg dest = ra_dest(as, ir, RSET_GPR);
       Reg dest = ra_dest(as, ir, RSET_GPR);
-      x86Op op = st == IRT_NUM ?
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
+      x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
       if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
       if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
 	/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
 	/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
 	/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
 	/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -826,8 +821,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
   if (ra_hasreg(dest)) {
   if (ra_hasreg(dest)) {
     ra_free(as, dest);
     ra_free(as, dest);
     ra_modified(as, dest);
     ra_modified(as, dest);
-    emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
-	      dest, RID_ESP, ofs);
+    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
   }
   }
   emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
   emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
 	    irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
 	    irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -855,7 +849,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
   Reg lo, hi;
   Reg lo, hi;
   lua_assert(st == IRT_NUM || st == IRT_FLOAT);
   lua_assert(st == IRT_NUM || st == IRT_FLOAT);
   lua_assert(dt == IRT_I64 || dt == IRT_U64);
   lua_assert(dt == IRT_I64 || dt == IRT_U64);
-  lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
   hi = ra_dest(as, ir, RSET_GPR);
   hi = ra_dest(as, ir, RSET_GPR);
   lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
   lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
   if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
   if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -898,6 +891,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
 	   st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
 	   st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
 	   asm_fuseload(as, ir->op1, RSET_EMPTY));
 	   asm_fuseload(as, ir->op1, RSET_EMPTY));
 }
 }
+
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  if (irt_isfp(ir->t))
+    asm_conv_fp_int64(as, ir);
+  else
+    asm_conv_int64_fp(as, ir);
+}
 #endif
 #endif
 
 
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
@@ -919,29 +920,32 @@ static void asm_strto(ASMState *as, IRIns *ir)
 	    RID_ESP, sps_scale(ir->s));
 	    RID_ESP, sps_scale(ir->s));
 }
 }
 
 
-static void asm_tostr(ASMState *as, IRIns *ir)
+/* -- Memory references --------------------------------------------------- */
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
 {
-  IRIns *irl = IR(ir->op1);
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(irl->t)) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
-	      RID_ESP, ra_spill(as, irl));
+  IRIns *ir = IR(ref);
+  if (irt_isnum(ir->t)) {
+    /* For numbers use the constant itself or a spill slot as a TValue. */
+    if (irref_isk(ref))
+      emit_loada(as, dest, ir_knum(ir));
+    else
+      emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
   } else {
   } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
+    /* Otherwise use g->tmptv to hold the TValue. */
+    if (!irref_isk(ref)) {
+      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+    } else if (!irt_ispri(ir->t)) {
+      emit_movmroi(as, dest, 0, ir->i);
+    }
+    if (!(LJ_64 && irt_islightud(ir->t)))
+      emit_movmroi(as, dest, 4, irt_toitype(ir->t));
+    emit_loada(as, dest, &J2G(as->J)->tmptv);
   }
   }
 }
 }
 
 
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 static void asm_aref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -952,23 +956,6 @@ static void asm_aref(ASMState *as, IRIns *ir)
     emit_rr(as, XO_MOV, dest, as->mrm.base);
     emit_rr(as, XO_MOV, dest, as->mrm.base);
 }
 }
 
 
-/* Merge NE(HREF, niltv) check. */
-static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
-{
-  /* Assumes nothing else generates NE of HREF. */
-  if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
-      ra_hasreg(ir->r)) {
-    MCode *p = as->mcp;
-    p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
-    /* Ensure no loop branch inversion happened. */
-    if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
-      as->mcp = p;  /* Kill cmp reg, imm32 + jz exit. */
-      return p + *(int32_t *)(p-4);  /* Return exit address. */
-    }
-  }
-  return NULL;
-}
-
 /* Inlined hash lookup. Specialized for key type and for const keys.
 /* Inlined hash lookup. Specialized for key type and for const keys.
 ** The equivalent C code is:
 ** The equivalent C code is:
 **   Node *n = hashkey(t, key);
 **   Node *n = hashkey(t, key);
@@ -977,10 +964,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
 **   } while ((n = nextnode(n)));
 **   } while ((n = nextnode(n)));
 **   return niltv(L);
 **   return niltv(L);
 */
 */
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 {
 {
-  MCode *nilexit = merge_href_niltv(as, ir);  /* Do this before any restores. */
   RegSet allow = RSET_GPR;
   RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
   Reg dest = ra_dest(as, ir, allow);
   Reg dest = ra_dest(as, ir, allow);
   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
   Reg key = RID_NONE, tmp = RID_NONE;
   Reg key = RID_NONE, tmp = RID_NONE;
@@ -997,14 +984,12 @@ static void asm_href(ASMState *as, IRIns *ir)
       tmp = ra_scratch(as, rset_exclude(allow, key));
       tmp = ra_scratch(as, rset_exclude(allow, key));
   }
   }
 
 
-  /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
   l_end = emit_label(as);
   l_end = emit_label(as);
-  if (nilexit && ir[1].o == IR_NE) {
-    emit_jcc(as, CC_E, nilexit);  /* XI_JMP is not found by lj_asm_patchexit. */
-    nilexit = NULL;
-  } else {
+  if (merge == IR_NE)
+    asm_guardcc(as, CC_E);  /* XI_JMP is not found by lj_asm_patchexit. */
+  else if (destused)
     emit_loada(as, dest, niltvg(J2G(as->J)));
     emit_loada(as, dest, niltvg(J2G(as->J)));
-  }
 
 
   /* Follow hash chain until the end. */
   /* Follow hash chain until the end. */
   l_loop = emit_sjcc_label(as, CC_NZ);
   l_loop = emit_sjcc_label(as, CC_NZ);
@@ -1013,8 +998,8 @@ static void asm_href(ASMState *as, IRIns *ir)
   l_next = emit_label(as);
   l_next = emit_label(as);
 
 
   /* Type and value comparison. */
   /* Type and value comparison. */
-  if (nilexit)
-    emit_jcc(as, CC_E, nilexit);
+  if (merge == IR_EQ)
+    asm_guardcc(as, CC_E);
   else
   else
     emit_sjcc(as, CC_E, l_end);
     emit_sjcc(as, CC_E, l_end);
   if (irt_isnum(kt)) {
   if (irt_isnum(kt)) {
@@ -1170,41 +1155,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  IRIns *irkey;
-  Reg tmp;
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  tmp = ra_releasetmp(as, ASMREF_TMP1);
-  irkey = IR(ir->op2);
-  if (irt_isnum(irkey->t)) {
-    /* For numbers use the constant itself or a spill slot as a TValue. */
-    if (irref_isk(ir->op2))
-      emit_loada(as, tmp, ir_knum(irkey));
-    else
-      emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    if (!irref_isk(ir->op2)) {
-      Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
-      emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
-    } else if (!irt_ispri(irkey->t)) {
-      emit_movmroi(as, tmp, 0, irkey->i);
-    }
-    if (!(LJ_64 && irt_islightud(irkey->t)))
-      emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
-    emit_loada(as, tmp, &J2G(as->J)->tmptv);
-  }
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1264,7 +1214,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
   case IRT_U8: xo = XO_MOVZXb; break;
   case IRT_U8: xo = XO_MOVZXb; break;
   case IRT_I16: xo = XO_MOVSXw; break;
   case IRT_I16: xo = XO_MOVSXw; break;
   case IRT_U16: xo = XO_MOVZXw; break;
   case IRT_U16: xo = XO_MOVZXw; break;
-  case IRT_NUM: xo = XMM_MOVRM(as); break;
+  case IRT_NUM: xo = XO_MOVSD; break;
   case IRT_FLOAT: xo = XO_MOVSS; break;
   case IRT_FLOAT: xo = XO_MOVSS; break;
   default:
   default:
     if (LJ_64 && irt_is64(ir->t))
     if (LJ_64 && irt_is64(ir->t))
@@ -1277,6 +1227,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
   emit_mrm(as, xo, dest, RID_MRM);
   emit_mrm(as, xo, dest, RID_MRM);
 }
 }
 
 
+#define asm_fload(as, ir)	asm_fxload(as, ir)
+#define asm_xload(as, ir)	asm_fxload(as, ir)
+
 static void asm_fxstore(ASMState *as, IRIns *ir)
 static void asm_fxstore(ASMState *as, IRIns *ir)
 {
 {
   RegSet allow = RSET_GPR;
   RegSet allow = RSET_GPR;
@@ -1340,6 +1293,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
+#define asm_fstore(as, ir)	asm_fxstore(as, ir)
+#define asm_xstore(as, ir)	asm_fxstore(as, ir)
+
 #if LJ_64
 #if LJ_64
 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
 {
 {
@@ -1378,7 +1334,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
     RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
     Reg dest = ra_dest(as, ir, allow);
     Reg dest = ra_dest(as, ir, allow);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
-    emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
+    emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
   } else {
   } else {
     asm_fuseahuref(as, ir->op1, RSET_GPR);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
   }
   }
@@ -1444,7 +1400,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
     Reg left = ra_scratch(as, RSET_FPR);
     Reg left = ra_scratch(as, RSET_FPR);
     asm_tointg(as, ir, left);  /* Frees dest reg. Do this before base alloc. */
     asm_tointg(as, ir, left);  /* Frees dest reg. Do this before base alloc. */
     base = ra_alloc1(as, REF_BASE, RSET_GPR);
     base = ra_alloc1(as, REF_BASE, RSET_GPR);
-    emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
+    emit_rmro(as, XO_MOVSD, left, base, ofs);
     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
 #if LJ_64
 #if LJ_64
   } else if (irt_islightud(t)) {
   } else if (irt_islightud(t)) {
@@ -1462,11 +1418,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
     if ((ir->op2 & IRSLOAD_CONVERT)) {
     if ((ir->op2 & IRSLOAD_CONVERT)) {
       t.irt = irt_isint(t) ? IRT_NUM : IRT_INT;  /* Check for original type. */
       t.irt = irt_isint(t) ? IRT_NUM : IRT_INT;  /* Check for original type. */
-      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
-    } else if (irt_isnum(t)) {
-      emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
+      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
     } else {
     } else {
-      emit_rmro(as, XO_MOV, dest, base, ofs);
+      emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
     }
     }
   } else {
   } else {
     if (!(ir->op2 & IRSLOAD_TYPECHECK))
     if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1493,15 +1447,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  lua_assert(sz != CTSIZE_INVALID);
+  IRRef args[4];
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   as->gcsteps++;
   asm_setupresult(as, ir, ci);  /* GCcdata * */
   asm_setupresult(as, ir, ci);  /* GCcdata * */
 
 
@@ -1544,15 +1496,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
     } while (1);
     } while (1);
 #endif
 #endif
     lua_assert(sz == 4 || sz == 8);
     lua_assert(sz == 4 || sz == 8);
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
   }
 
 
   /* Combine initialization of marked, gct and ctypeid. */
   /* Combine initialization of marked, gct and ctypeid. */
   emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
   emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
   emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
   emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
-	   (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16)));
+	   (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
   emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
   emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
   emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
   emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
 
 
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
   emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
 }
 }
@@ -1630,36 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
   }
   }
 }
 }
 
 
-/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
-static int fpmjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      /* The modified regs must match with the *.dasc implementation. */
-      RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-      IRIns *irx;
-      if (ra_hasreg(ir->r))
-	rset_clear(drop, ir->r);  /* Dest reg handled below. */
-      ra_evictset(as, drop);
-      ra_destreg(as, ir, RID_XMM0);
-      emit_call(as, lj_vm_pow_sse);
-      irx = IR(irpp->op1);
-      if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
-	irx->r = RID_INIT;  /* Avoid allocating xmm1 for x. */
-      ra_left(as, RID_XMM0, irpp->op1);
-      ra_left(as, RID_XMM1, irp->op2);
-      return 1;
-    }
-  }
-  return 0;
-}
-
 static void asm_fpmath(ASMState *as, IRIns *ir)
 static void asm_fpmath(ASMState *as, IRIns *ir)
 {
 {
-  IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
+  IRFPMathOp fpm = (IRFPMathOp)ir->op2;
   if (fpm == IRFPM_SQRT) {
   if (fpm == IRFPM_SQRT) {
     Reg dest = ra_dest(as, ir, RSET_FPR);
     Reg dest = ra_dest(as, ir, RSET_FPR);
     Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
     Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1690,53 +1626,31 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
 		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
 		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
       ra_left(as, RID_XMM0, ir->op1);
       ra_left(as, RID_XMM0, ir->op1);
     }
     }
-  } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
+  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
     /* Rejoined to pow(). */
     /* Rejoined to pow(). */
-  } else {  /* Handle x87 ops. */
-    int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
-    Reg dest = ir->r;
-    if (ra_hasreg(dest)) {
-      ra_free(as, dest);
-      ra_modified(as, dest);
-      emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
-    }
-    emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
-    switch (fpm) {  /* st0 = lj_vm_*(st0) */
-    case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
-    case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
-    case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
-    case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
-    case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
-    case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
-      /* Note: the use of fyl2xp1 would be pointless here. When computing
-      ** log(1.0+eps) the precision is already lost after 1.0 is added.
-      ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
-      */
-      emit_x87op(as, XI_FYL2X); break;
-    case IRFPM_OTHER:
-      switch (ir->o) {
-      case IR_ATAN2:
-	emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
-      case IR_LDEXP:
-	emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
-      default: lua_assert(0); break;
-      }
-      break;
-    default: lua_assert(0); break;
-    }
-    asm_x87load(as, ir->op1);
-    switch (fpm) {
-    case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
-    case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
-    case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
-    case IRFPM_OTHER:
-      if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
-      break;
-    default: break;
-    }
+  } else {
+    asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
   }
   }
 }
 }
 
 
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+
+static void asm_ldexp(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
+  Reg dest = ir->r;
+  if (ra_hasreg(dest)) {
+    ra_free(as, dest);
+    ra_modified(as, dest);
+    emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
+  }
+  emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+  emit_x87op(as, XI_FPOP1);
+  emit_x87op(as, XI_FSCALE);
+  asm_x87load(as, ir->op1);
+  asm_x87load(as, ir->op2);
+}
+
 static void asm_fppowi(ASMState *as, IRIns *ir)
 static void asm_fppowi(ASMState *as, IRIns *ir)
 {
 {
   /* The modified regs must match with the *.dasc implementation. */
   /* The modified regs must match with the *.dasc implementation. */
@@ -1750,26 +1664,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
   ra_left(as, RID_EAX, ir->op2);
   ra_left(as, RID_EAX, ir->op2);
 }
 }
 
 
-#if LJ_64 && LJ_HASFFI
-static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_pow(ASMState *as, IRIns *ir)
 {
 {
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+					  IRCALL_lj_carith_powu64);
+  else
 #endif
 #endif
-
-static void asm_intmod(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
+    asm_fppowi(as, ir);
 }
 }
 
 
 static int asm_swapops(ASMState *as, IRIns *ir)
 static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1828,8 +1731,12 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
   Reg dest, right;
   Reg dest, right;
   int32_t k = 0;
   int32_t k = 0;
   if (as->flagmcp == as->mcp) {  /* Drop test r,r instruction. */
   if (as->flagmcp == as->mcp) {  /* Drop test r,r instruction. */
-    as->flagmcp = NULL;
-    as->mcp += (LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2;
+    MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2);
+    if ((p[1] & 15) < 14) {
+      if ((p[1] & 15) >= 12) p[1] -= 4;  /* L <->S, NL <-> NS */
+      as->flagmcp = NULL;
+      as->mcp = p;
+    }  /* else: cannot transform LE/NLE to cc without use of OF. */
   }
   }
   right = IR(rref)->r;
   right = IR(rref)->r;
   if (ra_hasreg(right)) {
   if (ra_hasreg(right)) {
@@ -1948,6 +1855,44 @@ static void asm_add(ASMState *as, IRIns *ir)
     asm_intarith(as, ir, XOg_ADD);
     asm_intarith(as, ir, XOg_ADD);
 }
 }
 
 
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_SUBSD);
+  else  /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
+    asm_intarith(as, ir, XOg_SUB);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MULSD);
+  else
+    asm_intarith(as, ir, XOg_X_IMUL);
+}
+
+static void asm_div(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+					  IRCALL_lj_carith_divu64);
+  else
+#endif
+    asm_fparith(as, ir, XO_DIVSD);
+}
+
+static void asm_mod(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isint(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					  IRCALL_lj_carith_modu64);
+  else
+#endif
+    asm_callid(as, ir, IRCALL_lj_vm_modi);
+}
+
 static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
 static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1955,7 +1900,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
   ra_left(as, dest, ir->op1);
   ra_left(as, dest, ir->op1);
 }
 }
 
 
-static void asm_min_max(ASMState *as, IRIns *ir, int cc)
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_XORPS);
+  else
+    asm_neg_not(as, ir, XOg_NEG);
+}
+
+#define asm_abs(as, ir)		asm_fparith(as, ir, XO_ANDPS)
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 {
 {
   Reg right, dest = ra_dest(as, ir, RSET_GPR);
   Reg right, dest = ra_dest(as, ir, RSET_GPR);
   IRRef lref = ir->op1, rref = ir->op2;
   IRRef lref = ir->op1, rref = ir->op2;
@@ -1966,7 +1921,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
   ra_left(as, dest, lref);
   ra_left(as, dest, lref);
 }
 }
 
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_min(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MINSD);
+  else
+    asm_intmin_max(as, ir, CC_G);
+}
+
+static void asm_max(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MAXSD);
+  else
+    asm_intmin_max(as, ir, CC_L);
+}
+
+/* Note: don't use LEA for overflow-checking arithmetic! */
+#define asm_addov(as, ir)	asm_intarith(as, ir, XOg_ADD)
+#define asm_subov(as, ir)	asm_intarith(as, ir, XOg_SUB)
+#define asm_mulov(as, ir)	asm_intarith(as, ir, XOg_X_IMUL)
+
+#define asm_bnot(as, ir)	asm_neg_not(as, ir, XOg_NOT)
+
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
   as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
   as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1974,6 +1952,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   ra_left(as, dest, ir->op1);
   ra_left(as, dest, ir->op1);
 }
 }
 
 
+#define asm_band(as, ir)	asm_intarith(as, ir, XOg_AND)
+#define asm_bor(as, ir)		asm_intarith(as, ir, XOg_OR)
+#define asm_bxor(as, ir)	asm_intarith(as, ir, XOg_XOR)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
 static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
 {
 {
   IRRef rref = ir->op2;
   IRRef rref = ir->op2;
@@ -2013,6 +1995,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
   */
   */
 }
 }
 
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, XOg_SHL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, XOg_SHR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, XOg_SAR)
+#define asm_brol(as, ir)	asm_bitshift(as, ir, XOg_ROL)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, XOg_ROR)
+
 /* -- Comparisons --------------------------------------------------------- */
 /* -- Comparisons --------------------------------------------------------- */
 
 
 /* Virtual flags for unordered FP comparisons. */
 /* Virtual flags for unordered FP comparisons. */
@@ -2039,8 +2027,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
 };
 };
 
 
 /* FP and integer comparisons. */
 /* FP and integer comparisons. */
-static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
+static void asm_comp(ASMState *as, IRIns *ir)
 {
 {
+  uint32_t cc = asm_compmap[ir->o];
   if (irt_isnum(ir->t)) {
   if (irt_isnum(ir->t)) {
     IRRef lref = ir->op1;
     IRRef lref = ir->op1;
     IRRef rref = ir->op2;
     IRRef rref = ir->op2;
@@ -2195,6 +2184,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
   }
   }
 }
 }
 
 
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_32 && LJ_HASFFI
 #if LJ_32 && LJ_HASFFI
 /* 64 bit integer comparisons in 32 bit mode. */
 /* 64 bit integer comparisons in 32 bit mode. */
 static void asm_comp_int64(ASMState *as, IRIns *ir)
 static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2277,13 +2268,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
-    if (usehi || uselo) {
-      if (irt_isfp(ir->t))
-	asm_conv_fp_int64(as, ir);
-      else
-	asm_conv_int64_fp(as, ir);
-    }
     as->curins--;  /* Always skip the CONV. */
     as->curins--;  /* Always skip the CONV. */
+    if (usehi || uselo)
+      asm_conv64(as, ir);
     return;
     return;
   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
     asm_comp_int64(as, ir);
     asm_comp_int64(as, ir);
@@ -2332,6 +2319,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 #endif
 }
 }
 
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_i8(as, HOOK_PROFILE);
+  emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 /* -- Stack handling ------------------------------------------------------ */
 
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2353,7 +2350,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
     emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
     emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
 	      ptr2addr(&J2G(as->J)->jit_base));
 	      ptr2addr(&J2G(as->J)->jit_base));
   emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
   emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
-  emit_getgl(as, r, jit_L);
+  emit_getgl(as, r, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
     emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
 }
 }
@@ -2581,163 +2578,6 @@ static void asm_tail_prep(ASMState *as)
   }
   }
 }
 }
 
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_EQ: case IR_NE: case IR_ABC:
-    asm_comp(as, ir, asm_compmap[ir->o]);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
-  case IR_BOR:  asm_intarith(as, ir, XOg_OR); break;
-  case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
-  case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
-  case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
-  case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
-  case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_SUBSD);
-    else  /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
-      asm_intarith(as, ir, XOg_SUB);
-    break;
-  case IR_MUL:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MULSD);
-    else
-      asm_intarith(as, ir, XOg_X_IMUL);
-    break;
-  case IR_DIV:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isnum(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
-					     IRCALL_lj_carith_divu64);
-    else
-#endif
-      asm_fparith(as, ir, XO_DIVSD);
-    break;
-  case IR_MOD:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isint(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
-					     IRCALL_lj_carith_modu64);
-    else
-#endif
-      asm_intmod(as, ir);
-    break;
-
-  case IR_NEG:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_XORPS);
-    else
-      asm_neg_not(as, ir, XOg_NEG);
-    break;
-  case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
-
-  case IR_MIN:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MINSD);
-    else
-      asm_min_max(as, ir, CC_G);
-    break;
-  case IR_MAX:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MAXSD);
-    else
-      asm_min_max(as, ir, CC_L);
-    break;
-
-  case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
-    asm_fpmath(as, ir);
-    break;
-  case IR_POW:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isnum(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
-					     IRCALL_lj_carith_powu64);
-    else
-#endif
-      asm_fppowi(as, ir);
-    break;
-
-  /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
-  case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
-  case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
-  case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 /* -- Trace setup --------------------------------------------------------- */
 
 
 /* Ensure there are enough stack slots for call arguments. */
 /* Ensure there are enough stack slots for call arguments. */

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lj_bc.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Bytecode instruction modes.
 ** Bytecode instruction modes.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_bc_c
 #define lj_bc_c

+ 5 - 1
Source/ThirdParty/LuaJIT/src/lj_bc.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Bytecode instruction format.
 ** Bytecode instruction format.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_BC_H
 #ifndef _LJ_BC_H
@@ -89,6 +89,8 @@
   _(ISFC,	dst,	___,	var,	___) \
   _(ISFC,	dst,	___,	var,	___) \
   _(IST,	___,	___,	var,	___) \
   _(IST,	___,	___,	var,	___) \
   _(ISF,	___,	___,	var,	___) \
   _(ISF,	___,	___,	var,	___) \
+  _(ISTYPE,	var,	___,	lit,	___) \
+  _(ISNUM,	var,	___,	lit,	___) \
   \
   \
   /* Unary ops. */ \
   /* Unary ops. */ \
   _(MOV,	dst,	___,	var,	___) \
   _(MOV,	dst,	___,	var,	___) \
@@ -143,10 +145,12 @@
   _(TGETV,	dst,	var,	var,	index) \
   _(TGETV,	dst,	var,	var,	index) \
   _(TGETS,	dst,	var,	str,	index) \
   _(TGETS,	dst,	var,	str,	index) \
   _(TGETB,	dst,	var,	lit,	index) \
   _(TGETB,	dst,	var,	lit,	index) \
+  _(TGETR,	dst,	var,	var,	index) \
   _(TSETV,	var,	var,	var,	newindex) \
   _(TSETV,	var,	var,	var,	newindex) \
   _(TSETS,	var,	var,	str,	newindex) \
   _(TSETS,	var,	var,	str,	newindex) \
   _(TSETB,	var,	var,	lit,	newindex) \
   _(TSETB,	var,	var,	lit,	newindex) \
   _(TSETM,	base,	___,	num,	newindex) \
   _(TSETM,	base,	___,	num,	newindex) \
+  _(TSETR,	var,	var,	var,	newindex) \
   \
   \
   /* Calls and vararg handling. T = tail call. */ \
   /* Calls and vararg handling. T = tail call. */ \
   _(CALLM,	base,	lit,	lit,	call) \
   _(CALLM,	base,	lit,	lit,	call) \

+ 5 - 3
Source/ThirdParty/LuaJIT/src/lj_bcdump.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Bytecode dump definitions.
 ** Bytecode dump definitions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_BCDUMP_H
 #ifndef _LJ_BCDUMP_H
@@ -36,14 +36,15 @@
 /* If you perform *any* kind of private modifications to the bytecode itself
 /* If you perform *any* kind of private modifications to the bytecode itself
 ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
 ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
 */
 */
-#define BCDUMP_VERSION		1
+#define BCDUMP_VERSION		2
 
 
 /* Compatibility flags. */
 /* Compatibility flags. */
 #define BCDUMP_F_BE		0x01
 #define BCDUMP_F_BE		0x01
 #define BCDUMP_F_STRIP		0x02
 #define BCDUMP_F_STRIP		0x02
 #define BCDUMP_F_FFI		0x04
 #define BCDUMP_F_FFI		0x04
+#define BCDUMP_F_FR2		0x08
 
 
-#define BCDUMP_F_KNOWN		(BCDUMP_F_FFI*2-1)
+#define BCDUMP_F_KNOWN		(BCDUMP_F_FR2*2-1)
 
 
 /* Type codes for the GC constants of a prototype. Plus length for strings. */
 /* Type codes for the GC constants of a prototype. Plus length for strings. */
 enum {
 enum {
@@ -61,6 +62,7 @@ enum {
 
 
 LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
 LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
 		       void *data, int strip);
 		       void *data, int strip);
+LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
 LJ_FUNC GCproto *lj_bcread(LexState *ls);
 LJ_FUNC GCproto *lj_bcread(LexState *ls);
 
 
 #endif
 #endif

+ 63 - 82
Source/ThirdParty/LuaJIT/src/lj_bcread.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Bytecode reader.
 ** Bytecode reader.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_bcread_c
 #define lj_bcread_c
@@ -9,6 +9,7 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_bc.h"
 #include "lj_bc.h"
@@ -20,6 +21,7 @@
 #include "lj_lex.h"
 #include "lj_lex.h"
 #include "lj_bcdump.h"
 #include "lj_bcdump.h"
 #include "lj_state.h"
 #include "lj_state.h"
+#include "lj_strfmt.h"
 
 
 /* Reuse some lexer fields for our own purposes. */
 /* Reuse some lexer fields for our own purposes. */
 #define bcread_flags(ls)	ls->level
 #define bcread_flags(ls)	ls->level
@@ -38,84 +40,73 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
   const char *name = ls->chunkarg;
   const char *name = ls->chunkarg;
   if (*name == BCDUMP_HEAD1) name = "(binary)";
   if (*name == BCDUMP_HEAD1) name = "(binary)";
   else if (*name == '@' || *name == '=') name++;
   else if (*name == '@' || *name == '=') name++;
-  lj_str_pushf(L, "%s: %s", name, err2msg(em));
+  lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
   lj_err_throw(L, LUA_ERRSYNTAX);
   lj_err_throw(L, LUA_ERRSYNTAX);
 }
 }
 
 
-/* Resize input buffer. */
-static void bcread_resize(LexState *ls, MSize len)
-{
-  if (ls->sb.sz < len) {
-    MSize sz = ls->sb.sz * 2;
-    while (len > sz) sz = sz * 2;
-    lj_str_resizebuf(ls->L, &ls->sb, sz);
-    /* Caveat: this may change ls->sb.buf which may affect ls->p. */
-  }
-}
-
-/* Refill buffer if needed. */
+/* Refill buffer. */
 static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
 static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
 {
 {
   lua_assert(len != 0);
   lua_assert(len != 0);
-  if (len > LJ_MAX_MEM || ls->current < 0)
+  if (len > LJ_MAX_BUF || ls->c < 0)
     bcread_error(ls, LJ_ERR_BCBAD);
     bcread_error(ls, LJ_ERR_BCBAD);
   do {
   do {
     const char *buf;
     const char *buf;
-    size_t size;
-    if (ls->n) {  /* Copy remainder to buffer. */
-      if (ls->sb.n) {  /* Move down in buffer. */
-	lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n);
-	if (ls->n != ls->sb.n)
-	  memmove(ls->sb.buf, ls->p, ls->n);
+    size_t sz;
+    char *p = sbufB(&ls->sb);
+    MSize n = (MSize)(ls->pe - ls->p);
+    if (n) {  /* Copy remainder to buffer. */
+      if (sbuflen(&ls->sb)) {  /* Move down in buffer. */
+	lua_assert(ls->pe == sbufP(&ls->sb));
+	if (ls->p != p) memmove(p, ls->p, n);
       } else {  /* Copy from buffer provided by reader. */
       } else {  /* Copy from buffer provided by reader. */
-	bcread_resize(ls, len);
-	memcpy(ls->sb.buf, ls->p, ls->n);
+	p = lj_buf_need(&ls->sb, len);
+	memcpy(p, ls->p, n);
       }
       }
-      ls->p = ls->sb.buf;
+      ls->p = p;
+      ls->pe = p + n;
     }
     }
-    ls->sb.n = ls->n;
-    buf = ls->rfunc(ls->L, ls->rdata, &size);  /* Get more data from reader. */
-    if (buf == NULL || size == 0) {  /* EOF? */
+    setsbufP(&ls->sb, p + n);
+    buf = ls->rfunc(ls->L, ls->rdata, &sz);  /* Get more data from reader. */
+    if (buf == NULL || sz == 0) {  /* EOF? */
       if (need) bcread_error(ls, LJ_ERR_BCBAD);
       if (need) bcread_error(ls, LJ_ERR_BCBAD);
-      ls->current = -1;  /* Only bad if we get called again. */
+      ls->c = -1;  /* Only bad if we get called again. */
       break;
       break;
     }
     }
-    if (ls->sb.n) {  /* Append to buffer. */
-      MSize n = ls->sb.n + (MSize)size;
-      bcread_resize(ls, n < len ? len : n);
-      memcpy(ls->sb.buf + ls->sb.n, buf, size);
-      ls->n = ls->sb.n = n;
-      ls->p = ls->sb.buf;
+    if (n) {  /* Append to buffer. */
+      n += (MSize)sz;
+      p = lj_buf_need(&ls->sb, n < len ? len : n);
+      memcpy(sbufP(&ls->sb), buf, sz);
+      setsbufP(&ls->sb, p + n);
+      ls->p = p;
+      ls->pe = p + n;
     } else {  /* Return buffer provided by reader. */
     } else {  /* Return buffer provided by reader. */
-      ls->n = (MSize)size;
       ls->p = buf;
       ls->p = buf;
+      ls->pe = buf + sz;
     }
     }
-  } while (ls->n < len);
+  } while (ls->p + len > ls->pe);
 }
 }
 
 
 /* Need a certain number of bytes. */
 /* Need a certain number of bytes. */
 static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
 static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
 {
 {
-  if (LJ_UNLIKELY(ls->n < len))
+  if (LJ_UNLIKELY(ls->p + len > ls->pe))
     bcread_fill(ls, len, 1);
     bcread_fill(ls, len, 1);
 }
 }
 
 
 /* Want to read up to a certain number of bytes, but may need less. */
 /* Want to read up to a certain number of bytes, but may need less. */
 static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
 static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
 {
 {
-  if (LJ_UNLIKELY(ls->n < len))
+  if (LJ_UNLIKELY(ls->p + len > ls->pe))
     bcread_fill(ls, len, 0);
     bcread_fill(ls, len, 0);
 }
 }
 
 
-#define bcread_dec(ls)		check_exp(ls->n > 0, ls->n--)
-#define bcread_consume(ls, len)	check_exp(ls->n >= (len), ls->n -= (len))
-
 /* Return memory block from buffer. */
 /* Return memory block from buffer. */
-static uint8_t *bcread_mem(LexState *ls, MSize len)
+static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
 {
 {
   uint8_t *p = (uint8_t *)ls->p;
   uint8_t *p = (uint8_t *)ls->p;
-  bcread_consume(ls, len);
-  ls->p = (char *)p + len;
+  ls->p += len;
+  lua_assert(ls->p <= ls->pe);
   return p;
   return p;
 }
 }
 
 
@@ -128,25 +119,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
 /* Read byte from buffer. */
 /* Read byte from buffer. */
 static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
 static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
 {
 {
-  bcread_dec(ls);
+  lua_assert(ls->p < ls->pe);
   return (uint32_t)(uint8_t)*ls->p++;
   return (uint32_t)(uint8_t)*ls->p++;
 }
 }
 
 
 /* Read ULEB128 value from buffer. */
 /* Read ULEB128 value from buffer. */
-static uint32_t bcread_uleb128(LexState *ls)
+static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
 {
 {
-  const uint8_t *p = (const uint8_t *)ls->p;
-  uint32_t v = *p++;
-  if (LJ_UNLIKELY(v >= 0x80)) {
-    int sh = 0;
-    v &= 0x7f;
-    do {
-     v |= ((*p & 0x7f) << (sh += 7));
-     bcread_dec(ls);
-   } while (*p++ >= 0x80);
-  }
-  bcread_dec(ls);
-  ls->p = (char *)p;
+  uint32_t v = lj_buf_ruleb128(&ls->p);
+  lua_assert(ls->p <= ls->pe);
   return v;
   return v;
 }
 }
 
 
@@ -160,11 +141,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
     v &= 0x3f;
     v &= 0x3f;
     do {
     do {
      v |= ((*p & 0x7f) << (sh += 7));
      v |= ((*p & 0x7f) << (sh += 7));
-     bcread_dec(ls);
    } while (*p++ >= 0x80);
    } while (*p++ >= 0x80);
   }
   }
-  bcread_dec(ls);
   ls->p = (char *)p;
   ls->p = (char *)p;
+  lua_assert(ls->p <= ls->pe);
   return v;
   return v;
 }
 }
 
 
@@ -212,7 +192,7 @@ static void bcread_ktabk(LexState *ls, TValue *o)
     o->u32.hi = bcread_uleb128(ls);
     o->u32.hi = bcread_uleb128(ls);
   } else {
   } else {
     lua_assert(tp <= BCDUMP_KTAB_TRUE);
     lua_assert(tp <= BCDUMP_KTAB_TRUE);
-    setitype(o, ~tp);
+    setpriV(o, ~tp);
   }
   }
 }
 }
 
 
@@ -326,25 +306,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
 }
 }
 
 
 /* Read a prototype. */
 /* Read a prototype. */
-static GCproto *bcread_proto(LexState *ls)
+GCproto *lj_bcread_proto(LexState *ls)
 {
 {
   GCproto *pt;
   GCproto *pt;
   MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
   MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
   MSize ofsk, ofsuv, ofsdbg;
   MSize ofsk, ofsuv, ofsdbg;
   MSize sizedbg = 0;
   MSize sizedbg = 0;
   BCLine firstline = 0, numline = 0;
   BCLine firstline = 0, numline = 0;
-  MSize len, startn;
-
-  /* Read length. */
-  if (ls->n > 0 && ls->p[0] == 0) {  /* Shortcut EOF. */
-    ls->n--; ls->p++;
-    return NULL;
-  }
-  bcread_want(ls, 5);
-  len = bcread_uleb128(ls);
-  if (!len) return NULL;  /* EOF */
-  bcread_need(ls, len);
-  startn = ls->n;
 
 
   /* Read prototype header. */
   /* Read prototype header. */
   flags = bcread_byte(ls);
   flags = bcread_byte(ls);
@@ -413,9 +381,6 @@ static GCproto *bcread_proto(LexState *ls)
     setmref(pt->uvinfo, NULL);
     setmref(pt->uvinfo, NULL);
     setmref(pt->varinfo, NULL);
     setmref(pt->varinfo, NULL);
   }
   }
-
-  if (len != startn - ls->n)
-    bcread_error(ls, LJ_ERR_BCBAD);
   return pt;
   return pt;
 }
 }
 
 
@@ -429,6 +394,7 @@ static int bcread_header(LexState *ls)
       bcread_byte(ls) != BCDUMP_VERSION) return 0;
       bcread_byte(ls) != BCDUMP_VERSION) return 0;
   bcread_flags(ls) = flags = bcread_uleb128(ls);
   bcread_flags(ls) = flags = bcread_uleb128(ls);
   if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
   if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
+  if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
   if ((flags & BCDUMP_F_FFI)) {
   if ((flags & BCDUMP_F_FFI)) {
 #if LJ_HASFFI
 #if LJ_HASFFI
     lua_State *L = ls->L;
     lua_State *L = ls->L;
@@ -455,19 +421,34 @@ static int bcread_header(LexState *ls)
 GCproto *lj_bcread(LexState *ls)
 GCproto *lj_bcread(LexState *ls)
 {
 {
   lua_State *L = ls->L;
   lua_State *L = ls->L;
-  lua_assert(ls->current == BCDUMP_HEAD1);
+  lua_assert(ls->c == BCDUMP_HEAD1);
   bcread_savetop(L, ls, L->top);
   bcread_savetop(L, ls, L->top);
-  lj_str_resetbuf(&ls->sb);
+  lj_buf_reset(&ls->sb);
   /* Check for a valid bytecode dump header. */
   /* Check for a valid bytecode dump header. */
   if (!bcread_header(ls))
   if (!bcread_header(ls))
     bcread_error(ls, LJ_ERR_BCFMT);
     bcread_error(ls, LJ_ERR_BCFMT);
   for (;;) {  /* Process all prototypes in the bytecode dump. */
   for (;;) {  /* Process all prototypes in the bytecode dump. */
-    GCproto *pt = bcread_proto(ls);
-    if (!pt) break;
+    GCproto *pt;
+    MSize len;
+    const char *startp;
+    /* Read length. */
+    if (ls->p < ls->pe && ls->p[0] == 0) {  /* Shortcut EOF. */
+      ls->p++;
+      break;
+    }
+    bcread_want(ls, 5);
+    len = bcread_uleb128(ls);
+    if (!len) break;  /* EOF */
+    bcread_need(ls, len);
+    startp = ls->p;
+    pt = lj_bcread_proto(ls);
+    if (ls->p != startp + len)
+      bcread_error(ls, LJ_ERR_BCBAD);
     setprotoV(L, L->top, pt);
     setprotoV(L, L->top, pt);
     incr_top(L);
     incr_top(L);
   }
   }
-  if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls))
+  if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
+      L->top-1 != bcread_oldtop(L, ls))
     bcread_error(ls, LJ_ERR_BCBAD);
     bcread_error(ls, LJ_ERR_BCBAD);
   /* Pop off last prototype. */
   /* Pop off last prototype. */
   L->top--;
   L->top--;

+ 98 - 133
Source/ThirdParty/LuaJIT/src/lj_bcwrite.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Bytecode writer.
 ** Bytecode writer.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_bcwrite_c
 #define lj_bcwrite_c
@@ -8,7 +8,7 @@
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_gc.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_bc.h"
 #include "lj_bc.h"
 #if LJ_HASFFI
 #if LJ_HASFFI
 #include "lj_ctype.h"
 #include "lj_ctype.h"
@@ -17,13 +17,13 @@
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
 #include "lj_jit.h"
 #include "lj_jit.h"
 #endif
 #endif
+#include "lj_strfmt.h"
 #include "lj_bcdump.h"
 #include "lj_bcdump.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 
 
 /* Context for bytecode writer. */
 /* Context for bytecode writer. */
 typedef struct BCWriteCtx {
 typedef struct BCWriteCtx {
   SBuf sb;			/* Output buffer. */
   SBuf sb;			/* Output buffer. */
-  lua_State *L;			/* Lua state. */
   GCproto *pt;			/* Root prototype. */
   GCproto *pt;			/* Root prototype. */
   lua_Writer wfunc;		/* Writer callback. */
   lua_Writer wfunc;		/* Writer callback. */
   void *wdata;			/* Writer callback data. */
   void *wdata;			/* Writer callback data. */
@@ -31,85 +31,44 @@ typedef struct BCWriteCtx {
   int status;			/* Status from writer callback. */
   int status;			/* Status from writer callback. */
 } BCWriteCtx;
 } BCWriteCtx;
 
 
-/* -- Output buffer handling ---------------------------------------------- */
-
-/* Resize buffer if needed. */
-static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
-{
-  MSize sz = ctx->sb.sz * 2;
-  while (ctx->sb.n + len > sz) sz = sz * 2;
-  lj_str_resizebuf(ctx->L, &ctx->sb, sz);
-}
-
-/* Need a certain amount of buffer space. */
-static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
-{
-  if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
-    bcwrite_resize(ctx, len);
-}
-
-/* Add memory block to buffer. */
-static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
-{
-  uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
-  MSize i;
-  ctx->sb.n += len;
-  for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
-}
-
-/* Add byte to buffer. */
-static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
-{
-  ctx->sb.buf[ctx->sb.n++] = b;
-}
-
-/* Add ULEB128 value to buffer. */
-static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
-{
-  MSize n = ctx->sb.n;
-  uint8_t *p = (uint8_t *)ctx->sb.buf;
-  for (; v >= 0x80; v >>= 7)
-    p[n++] = (uint8_t)((v & 0x7f) | 0x80);
-  p[n++] = (uint8_t)v;
-  ctx->sb.n = n;
-}
-
 /* -- Bytecode writer ----------------------------------------------------- */
 /* -- Bytecode writer ----------------------------------------------------- */
 
 
 /* Write a single constant key/value of a template table. */
 /* Write a single constant key/value of a template table. */
 static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
 static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
 {
 {
-  bcwrite_need(ctx, 1+10);
+  char *p = lj_buf_more(&ctx->sb, 1+10);
   if (tvisstr(o)) {
   if (tvisstr(o)) {
     const GCstr *str = strV(o);
     const GCstr *str = strV(o);
     MSize len = str->len;
     MSize len = str->len;
-    bcwrite_need(ctx, 5+len);
-    bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len);
-    bcwrite_block(ctx, strdata(str), len);
+    p = lj_buf_more(&ctx->sb, 5+len);
+    p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
+    p = lj_buf_wmem(p, strdata(str), len);
   } else if (tvisint(o)) {
   } else if (tvisint(o)) {
-    bcwrite_byte(ctx, BCDUMP_KTAB_INT);
-    bcwrite_uleb128(ctx, intV(o));
+    *p++ = BCDUMP_KTAB_INT;
+    p = lj_strfmt_wuleb128(p, intV(o));
   } else if (tvisnum(o)) {
   } else if (tvisnum(o)) {
     if (!LJ_DUALNUM && narrow) {  /* Narrow number constants to integers. */
     if (!LJ_DUALNUM && narrow) {  /* Narrow number constants to integers. */
       lua_Number num = numV(o);
       lua_Number num = numV(o);
       int32_t k = lj_num2int(num);
       int32_t k = lj_num2int(num);
       if (num == (lua_Number)k) {  /* -0 is never a constant. */
       if (num == (lua_Number)k) {  /* -0 is never a constant. */
-	bcwrite_byte(ctx, BCDUMP_KTAB_INT);
-	bcwrite_uleb128(ctx, k);
+	*p++ = BCDUMP_KTAB_INT;
+	p = lj_strfmt_wuleb128(p, k);
+	setsbufP(&ctx->sb, p);
 	return;
 	return;
       }
       }
     }
     }
-    bcwrite_byte(ctx, BCDUMP_KTAB_NUM);
-    bcwrite_uleb128(ctx, o->u32.lo);
-    bcwrite_uleb128(ctx, o->u32.hi);
+    *p++ = BCDUMP_KTAB_NUM;
+    p = lj_strfmt_wuleb128(p, o->u32.lo);
+    p = lj_strfmt_wuleb128(p, o->u32.hi);
   } else {
   } else {
     lua_assert(tvispri(o));
     lua_assert(tvispri(o));
-    bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o));
+    *p++ = BCDUMP_KTAB_NIL+~itype(o);
   }
   }
+  setsbufP(&ctx->sb, p);
 }
 }
 
 
 /* Write a template table. */
 /* Write a template table. */
-static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
+static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
 {
 {
   MSize narray = 0, nhash = 0;
   MSize narray = 0, nhash = 0;
   if (t->asize > 0) {  /* Determine max. length of array part. */
   if (t->asize > 0) {  /* Determine max. length of array part. */
@@ -127,8 +86,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
       nhash += !tvisnil(&node[i].val);
       nhash += !tvisnil(&node[i].val);
   }
   }
   /* Write number of array slots and hash slots. */
   /* Write number of array slots and hash slots. */
-  bcwrite_uleb128(ctx, narray);
-  bcwrite_uleb128(ctx, nhash);
+  p = lj_strfmt_wuleb128(p, narray);
+  p = lj_strfmt_wuleb128(p, nhash);
+  setsbufP(&ctx->sb, p);
   if (narray) {  /* Write array entries (may contain nil). */
   if (narray) {  /* Write array entries (may contain nil). */
     MSize i;
     MSize i;
     TValue *o = tvref(t->array);
     TValue *o = tvref(t->array);
@@ -155,6 +115,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
   for (i = 0; i < sizekgc; i++, kr++) {
   for (i = 0; i < sizekgc; i++, kr++) {
     GCobj *o = gcref(*kr);
     GCobj *o = gcref(*kr);
     MSize tp, need = 1;
     MSize tp, need = 1;
+    char *p;
     /* Determine constant type and needed size. */
     /* Determine constant type and needed size. */
     if (o->gch.gct == ~LJ_TSTR) {
     if (o->gch.gct == ~LJ_TSTR) {
       tp = BCDUMP_KGC_STR + gco2str(o)->len;
       tp = BCDUMP_KGC_STR + gco2str(o)->len;
@@ -181,24 +142,26 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
       need = 1+2*5;
       need = 1+2*5;
     }
     }
     /* Write constant type. */
     /* Write constant type. */
-    bcwrite_need(ctx, need);
-    bcwrite_uleb128(ctx, tp);
+    p = lj_buf_more(&ctx->sb, need);
+    p = lj_strfmt_wuleb128(p, tp);
     /* Write constant data (if any). */
     /* Write constant data (if any). */
     if (tp >= BCDUMP_KGC_STR) {
     if (tp >= BCDUMP_KGC_STR) {
-      bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len);
+      p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
     } else if (tp == BCDUMP_KGC_TAB) {
     } else if (tp == BCDUMP_KGC_TAB) {
-      bcwrite_ktab(ctx, gco2tab(o));
+      bcwrite_ktab(ctx, p, gco2tab(o));
+      continue;
 #if LJ_HASFFI
 #if LJ_HASFFI
     } else if (tp != BCDUMP_KGC_CHILD) {
     } else if (tp != BCDUMP_KGC_CHILD) {
-      cTValue *p = (TValue *)cdataptr(gco2cd(o));
-      bcwrite_uleb128(ctx, p[0].u32.lo);
-      bcwrite_uleb128(ctx, p[0].u32.hi);
+      cTValue *q = (TValue *)cdataptr(gco2cd(o));
+      p = lj_strfmt_wuleb128(p, q[0].u32.lo);
+      p = lj_strfmt_wuleb128(p, q[0].u32.hi);
       if (tp == BCDUMP_KGC_COMPLEX) {
       if (tp == BCDUMP_KGC_COMPLEX) {
-	bcwrite_uleb128(ctx, p[1].u32.lo);
-	bcwrite_uleb128(ctx, p[1].u32.hi);
+	p = lj_strfmt_wuleb128(p, q[1].u32.lo);
+	p = lj_strfmt_wuleb128(p, q[1].u32.hi);
       }
       }
 #endif
 #endif
     }
     }
+    setsbufP(&ctx->sb, p);
   }
   }
 }
 }
 
 
@@ -207,7 +170,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
 {
 {
   MSize i, sizekn = pt->sizekn;
   MSize i, sizekn = pt->sizekn;
   cTValue *o = mref(pt->k, TValue);
   cTValue *o = mref(pt->k, TValue);
-  bcwrite_need(ctx, 10*sizekn);
+  char *p = lj_buf_more(&ctx->sb, 10*sizekn);
   for (i = 0; i < sizekn; i++, o++) {
   for (i = 0; i < sizekn; i++, o++) {
     int32_t k;
     int32_t k;
     if (tvisint(o)) {
     if (tvisint(o)) {
@@ -220,58 +183,58 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
 	k = lj_num2int(num);
 	k = lj_num2int(num);
 	if (num == (lua_Number)k) {  /* -0 is never a constant. */
 	if (num == (lua_Number)k) {  /* -0 is never a constant. */
 	save_int:
 	save_int:
-	  bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u));
-	  if (k < 0) {
-	    char *p = &ctx->sb.buf[ctx->sb.n-1];
-	    *p = (*p & 7) | ((k>>27) & 0x18);
-	  }
+	  p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
+	  if (k < 0)
+	    p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
 	  continue;
 	  continue;
 	}
 	}
       }
       }
-      bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
-      if (o->u32.lo >= 0x80000000u) {
-	char *p = &ctx->sb.buf[ctx->sb.n-1];
-	*p = (*p & 7) | ((o->u32.lo>>27) & 0x18);
-      }
-      bcwrite_uleb128(ctx, o->u32.hi);
+      p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
+      if (o->u32.lo >= 0x80000000u)
+	p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
+      p = lj_strfmt_wuleb128(p, o->u32.hi);
     }
     }
   }
   }
+  setsbufP(&ctx->sb, p);
 }
 }
 
 
 /* Write bytecode instructions. */
 /* Write bytecode instructions. */
-static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt)
+static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
 {
 {
   MSize nbc = pt->sizebc-1;  /* Omit the [JI]FUNC* header. */
   MSize nbc = pt->sizebc-1;  /* Omit the [JI]FUNC* header. */
 #if LJ_HASJIT
 #if LJ_HASJIT
-  uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n];
+  uint8_t *q = (uint8_t *)p;
 #endif
 #endif
-  bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
+  p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
+  UNUSED(ctx);
 #if LJ_HASJIT
 #if LJ_HASJIT
   /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
   /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
   if ((pt->flags & PROTO_ILOOP) || pt->trace) {
   if ((pt->flags & PROTO_ILOOP) || pt->trace) {
-    jit_State *J = L2J(ctx->L);
+    jit_State *J = L2J(sbufL(&ctx->sb));
     MSize i;
     MSize i;
-    for (i = 0; i < nbc; i++, p += sizeof(BCIns)) {
-      BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)];
+    for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
+      BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
       if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
       if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
 	  op == BC_JFORI) {
 	  op == BC_JFORI) {
-	p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
+	q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
       } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
       } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
-	BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8);
+	BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
 	BCIns ins = traceref(J, rd)->startins;
 	BCIns ins = traceref(J, rd)->startins;
-	p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
-	p[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
-	p[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
+	q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
+	q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
+	q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
       }
       }
     }
     }
   }
   }
 #endif
 #endif
+  return p;
 }
 }
 
 
 /* Write prototype. */
 /* Write prototype. */
 static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
 static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
 {
 {
   MSize sizedbg = 0;
   MSize sizedbg = 0;
+  char *p;
 
 
   /* Recursively write children of prototype. */
   /* Recursively write children of prototype. */
   if ((pt->flags & PROTO_CHILD)) {
   if ((pt->flags & PROTO_CHILD)) {
@@ -285,31 +248,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
   }
   }
 
 
   /* Start writing the prototype info to a buffer. */
   /* Start writing the prototype info to a buffer. */
-  lj_str_resetbuf(&ctx->sb);
-  ctx->sb.n = 5;  /* Leave room for final size. */
-  bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
+  p = lj_buf_need(&ctx->sb,
+		  5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
+  p += 5;  /* Leave room for final size. */
 
 
   /* Write prototype header. */
   /* Write prototype header. */
-  bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI)));
-  bcwrite_byte(ctx, pt->numparams);
-  bcwrite_byte(ctx, pt->framesize);
-  bcwrite_byte(ctx, pt->sizeuv);
-  bcwrite_uleb128(ctx, pt->sizekgc);
-  bcwrite_uleb128(ctx, pt->sizekn);
-  bcwrite_uleb128(ctx, pt->sizebc-1);
+  *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
+  *p++ = pt->numparams;
+  *p++ = pt->framesize;
+  *p++ = pt->sizeuv;
+  p = lj_strfmt_wuleb128(p, pt->sizekgc);
+  p = lj_strfmt_wuleb128(p, pt->sizekn);
+  p = lj_strfmt_wuleb128(p, pt->sizebc-1);
   if (!ctx->strip) {
   if (!ctx->strip) {
     if (proto_lineinfo(pt))
     if (proto_lineinfo(pt))
       sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
       sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
-    bcwrite_uleb128(ctx, sizedbg);
+    p = lj_strfmt_wuleb128(p, sizedbg);
     if (sizedbg) {
     if (sizedbg) {
-      bcwrite_uleb128(ctx, pt->firstline);
-      bcwrite_uleb128(ctx, pt->numline);
+      p = lj_strfmt_wuleb128(p, pt->firstline);
+      p = lj_strfmt_wuleb128(p, pt->numline);
     }
     }
   }
   }
 
 
   /* Write bytecode instructions and upvalue refs. */
   /* Write bytecode instructions and upvalue refs. */
-  bcwrite_bytecode(ctx, pt);
-  bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2);
+  p = bcwrite_bytecode(ctx, p, pt);
+  p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
+  setsbufP(&ctx->sb, p);
 
 
   /* Write constants. */
   /* Write constants. */
   bcwrite_kgc(ctx, pt);
   bcwrite_kgc(ctx, pt);
@@ -317,18 +281,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
 
 
   /* Write debug info, if not stripped. */
   /* Write debug info, if not stripped. */
   if (sizedbg) {
   if (sizedbg) {
-    bcwrite_need(ctx, sizedbg);
-    bcwrite_block(ctx, proto_lineinfo(pt), sizedbg);
+    p = lj_buf_more(&ctx->sb, sizedbg);
+    p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
+    setsbufP(&ctx->sb, p);
   }
   }
 
 
   /* Pass buffer to writer function. */
   /* Pass buffer to writer function. */
   if (ctx->status == 0) {
   if (ctx->status == 0) {
-    MSize n = ctx->sb.n - 5;
+    MSize n = sbuflen(&ctx->sb) - 5;
     MSize nn = (lj_fls(n)+8)*9 >> 6;
     MSize nn = (lj_fls(n)+8)*9 >> 6;
-    ctx->sb.n = 5 - nn;
-    bcwrite_uleb128(ctx, n);  /* Fill in final size. */
-    lua_assert(ctx->sb.n == 5);
-    ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata);
+    char *q = sbufB(&ctx->sb) + (5 - nn);
+    p = lj_strfmt_wuleb128(q, n);  /* Fill in final size. */
+    lua_assert(p == sbufB(&ctx->sb) + 5);
+    ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
   }
   }
 }
 }
 
 
@@ -338,20 +303,21 @@ static void bcwrite_header(BCWriteCtx *ctx)
   GCstr *chunkname = proto_chunkname(ctx->pt);
   GCstr *chunkname = proto_chunkname(ctx->pt);
   const char *name = strdata(chunkname);
   const char *name = strdata(chunkname);
   MSize len = chunkname->len;
   MSize len = chunkname->len;
-  lj_str_resetbuf(&ctx->sb);
-  bcwrite_need(ctx, 5+5+len);
-  bcwrite_byte(ctx, BCDUMP_HEAD1);
-  bcwrite_byte(ctx, BCDUMP_HEAD2);
-  bcwrite_byte(ctx, BCDUMP_HEAD3);
-  bcwrite_byte(ctx, BCDUMP_VERSION);
-  bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) +
-		   (LJ_BE ? BCDUMP_F_BE : 0) +
-		   ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0));
+  char *p = lj_buf_need(&ctx->sb, 5+5+len);
+  *p++ = BCDUMP_HEAD1;
+  *p++ = BCDUMP_HEAD2;
+  *p++ = BCDUMP_HEAD3;
+  *p++ = BCDUMP_VERSION;
+  *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
+	 LJ_BE*BCDUMP_F_BE +
+	 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
+	 LJ_FR2*BCDUMP_F_FR2;
   if (!ctx->strip) {
   if (!ctx->strip) {
-    bcwrite_uleb128(ctx, len);
-    bcwrite_block(ctx, name, len);
+    p = lj_strfmt_wuleb128(p, len);
+    p = lj_buf_wmem(p, name, len);
   }
   }
-  ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata);
+  ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
+			   (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
 }
 }
 
 
 /* Write footer of bytecode dump. */
 /* Write footer of bytecode dump. */
@@ -359,7 +325,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
 {
 {
   if (ctx->status == 0) {
   if (ctx->status == 0) {
     uint8_t zero = 0;
     uint8_t zero = 0;
-    ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata);
+    ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
   }
   }
 }
 }
 
 
@@ -367,8 +333,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
 static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
 static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
 {
 {
   BCWriteCtx *ctx = (BCWriteCtx *)ud;
   BCWriteCtx *ctx = (BCWriteCtx *)ud;
-  UNUSED(dummy);
-  lj_str_resizebuf(L, &ctx->sb, 1024);  /* Avoids resize for most prototypes. */
+  UNUSED(L); UNUSED(dummy);
+  lj_buf_need(&ctx->sb, 1024);  /* Avoids resize for most prototypes. */
   bcwrite_header(ctx);
   bcwrite_header(ctx);
   bcwrite_proto(ctx, ctx->pt);
   bcwrite_proto(ctx, ctx->pt);
   bcwrite_footer(ctx);
   bcwrite_footer(ctx);
@@ -381,16 +347,15 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
 {
 {
   BCWriteCtx ctx;
   BCWriteCtx ctx;
   int status;
   int status;
-  ctx.L = L;
   ctx.pt = pt;
   ctx.pt = pt;
   ctx.wfunc = writer;
   ctx.wfunc = writer;
   ctx.wdata = data;
   ctx.wdata = data;
   ctx.strip = strip;
   ctx.strip = strip;
   ctx.status = 0;
   ctx.status = 0;
-  lj_str_initbuf(&ctx.sb);
+  lj_buf_init(L, &ctx.sb);
   status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
   status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
   if (status == 0) status = ctx.status;
   if (status == 0) status = ctx.status;
-  lj_str_freebuf(G(ctx.L), &ctx.sb);
+  lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
   return status;
   return status;
 }
 }
 
 

+ 234 - 0
Source/ThirdParty/LuaJIT/src/lj_buf.c

@@ -0,0 +1,234 @@
+/*
+** Buffer handling.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_buf_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_strfmt.h"
+
+/* -- Buffer management --------------------------------------------------- */
+
+static void buf_grow(SBuf *sb, MSize sz)
+{
+  MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
+  char *b;
+  if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
+  while (nsz < sz) nsz += nsz;
+  b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz);
+  setmref(sb->b, b);
+  setmref(sb->p, b + len);
+  setmref(sb->e, b + nsz);
+}
+
+LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
+{
+  lua_assert(sz > sbufsz(sb));
+  if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
+    lj_err_mem(sbufL(sb));
+  buf_grow(sb, sz);
+  return sbufB(sb);
+}
+
+LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
+{
+  MSize len = sbuflen(sb);
+  lua_assert(sz > sbufleft(sb));
+  if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+    lj_err_mem(sbufL(sb));
+  buf_grow(sb, len + sz);
+  return sbufP(sb);
+}
+
+void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
+{
+  char *b = sbufB(sb);
+  MSize osz = (MSize)(sbufE(sb) - b);
+  if (osz > 2*LJ_MIN_SBUF) {
+    MSize n = (MSize)(sbufP(sb) - b);
+    b = lj_mem_realloc(L, b, osz, (osz >> 1));
+    setmref(sb->b, b);
+    setmref(sb->p, b + n);
+    setmref(sb->e, b + (osz >> 1));
+  }
+}
+
+char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
+{
+  SBuf *sb = &G(L)->tmpbuf;
+  setsbufL(sb, L);
+  return lj_buf_need(sb, sz);
+}
+
+/* -- Low-level buffer put operations ------------------------------------- */
+
+SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
+{
+  char *p = lj_buf_more(sb, len);
+  p = lj_buf_wmem(p, q, len);
+  setsbufP(sb, p);
+  return sb;
+}
+
+#if LJ_HASJIT
+SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+{
+  char *p = lj_buf_more(sb, 1);
+  *p++ = (char)c;
+  setsbufP(sb, p);
+  return sb;
+}
+#endif
+
+SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len);
+  p = lj_buf_wmem(p, strdata(s), len);
+  setsbufP(sb, p);
+  return sb;
+}
+
+/* -- High-level buffer put operations ------------------------------------ */
+
+SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len), *e = p+len;
+  const char *q = strdata(s)+len-1;
+  while (p < e)
+    *p++ = *q--;
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len), *e = p+len;
+  const char *q = strdata(s);
+  for (; p < e; p++, q++) {
+    uint32_t c = *(unsigned char *)q;
+#if LJ_TARGET_PPC
+    *p = c + ((c >= 'A' && c <= 'Z') << 5);
+#else
+    if (c >= 'A' && c <= 'Z') c += 0x20;
+    *p = c;
+#endif
+  }
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len), *e = p+len;
+  const char *q = strdata(s);
+  for (; p < e; p++, q++) {
+    uint32_t c = *(unsigned char *)q;
+#if LJ_TARGET_PPC
+    *p = c - ((c >= 'a' && c <= 'z') << 5);
+#else
+    if (c >= 'a' && c <= 'z') c -= 0x20;
+    *p = c;
+#endif
+  }
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
+{
+  MSize len = s->len;
+  if (rep > 0 && len) {
+    uint64_t tlen = (uint64_t)rep * len;
+    char *p;
+    if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
+      lj_err_mem(sbufL(sb));
+    p = lj_buf_more(sb, (MSize)tlen);
+    if (len == 1) {  /* Optimize a common case. */
+      uint32_t c = strdata(s)[0];
+      do { *p++ = c; } while (--rep > 0);
+    } else {
+      const char *e = strdata(s) + len;
+      do {
+	const char *q = strdata(s);
+	do { *p++ = *q++; } while (q < e);
+      } while (--rep > 0);
+    }
+    setsbufP(sb, p);
+  }
+  return sb;
+}
+
+SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
+{
+  MSize seplen = sep ? sep->len : 0;
+  if (i <= e) {
+    for (;;) {
+      cTValue *o = lj_tab_getint(t, i);
+      char *p;
+      if (!o) {
+      badtype:  /* Error: bad element type. */
+	setsbufP(sb, (void *)(intptr_t)i);  /* Store failing index. */
+	return NULL;
+      } else if (tvisstr(o)) {
+	MSize len = strV(o)->len;
+	p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
+      } else if (tvisint(o)) {
+	p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
+      } else if (tvisnum(o)) {
+	p = lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM+seplen), o);
+      } else {
+	goto badtype;
+      }
+      if (i++ == e) {
+	setsbufP(sb, p);
+	break;
+      }
+      if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
+      setsbufP(sb, p);
+    }
+  }
+  return sb;
+}
+
+/* -- Miscellaneous buffer operations ------------------------------------- */
+
+GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
+{
+  return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
+}
+
+/* Concatenate two strings. */
+GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
+{
+  MSize len1 = s1->len, len2 = s2->len;
+  char *buf = lj_buf_tmp(L, len1 + len2);
+  memcpy(buf, strdata(s1), len1);
+  memcpy(buf+len1, strdata(s2), len2);
+  return lj_str_new(L, buf, len1 + len2);
+}
+
+/* Read ULEB128 from buffer. */
+uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
+{
+  const uint8_t *p = (const uint8_t *)*pp;
+  uint32_t v = *p++;
+  if (LJ_UNLIKELY(v >= 0x80)) {
+    int sh = 0;
+    v &= 0x7f;
+    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+  }
+  *pp = (const char *)p;
+  return v;
+}
+

+ 105 - 0
Source/ThirdParty/LuaJIT/src/lj_buf.h

@@ -0,0 +1,105 @@
+/*
+** Buffer handling.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_BUF_H
+#define _LJ_BUF_H
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_str.h"
+
+/* Resizable string buffers. Struct definition in lj_obj.h. */
+#define sbufB(sb)	(mref((sb)->b, char))
+#define sbufP(sb)	(mref((sb)->p, char))
+#define sbufE(sb)	(mref((sb)->e, char))
+#define sbufL(sb)	(mref((sb)->L, lua_State))
+#define sbufsz(sb)	((MSize)(sbufE((sb)) - sbufB((sb))))
+#define sbuflen(sb)	((MSize)(sbufP((sb)) - sbufB((sb))))
+#define sbufleft(sb)	((MSize)(sbufE((sb)) - sbufP((sb))))
+#define setsbufP(sb, q)	(setmref((sb)->p, (q)))
+#define setsbufL(sb, l)	(setmref((sb)->L, (l)))
+
+/* Buffer management */
+LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
+LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz);
+LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
+LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
+
+static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
+{
+  setsbufL(sb, L);
+  setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
+}
+
+static LJ_AINLINE void lj_buf_reset(SBuf *sb)
+{
+  setmrefr(sb->p, sb->b);
+}
+
+static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
+{
+  SBuf *sb = &G(L)->tmpbuf;
+  setsbufL(sb, L);
+  lj_buf_reset(sb);
+  return sb;
+}
+
+static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
+{
+  lj_mem_free(g, sbufB(sb), sbufsz(sb));
+}
+
+static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
+{
+  if (LJ_UNLIKELY(sz > sbufsz(sb)))
+    return lj_buf_need2(sb, sz);
+  return sbufB(sb);
+}
+
+static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
+{
+  if (LJ_UNLIKELY(sz > sbufleft(sb)))
+    return lj_buf_more2(sb, sz);
+  return sbufP(sb);
+}
+
+/* Low-level buffer put operations */
+LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
+#if LJ_HASJIT
+LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
+#endif
+LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
+
+static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
+{
+  return (char *)memcpy(p, q, len) + len;
+}
+
+static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
+{
+  char *p = lj_buf_more(sb, 1);
+  *p++ = (char)c;
+  setsbufP(sb, p);
+}
+
+/* High-level buffer put operations */
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
+LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
+LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
+			    int32_t i, int32_t e);
+
+/* Miscellaneous buffer operations */
+LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
+LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
+LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
+
+static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
+{
+  return lj_str_new(L, sbufB(sb), sbuflen(sb));
+}
+
+#endif

+ 82 - 4
Source/ThirdParty/LuaJIT/src/lj_carith.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** C data arithmetic.
 ** C data arithmetic.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
@@ -11,10 +11,12 @@
 #include "lj_err.h"
 #include "lj_err.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
 #include "lj_meta.h"
+#include "lj_ir.h"
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cconv.h"
 #include "lj_cdata.h"
 #include "lj_cdata.h"
 #include "lj_carith.h"
 #include "lj_carith.h"
+#include "lj_strscan.h"
 
 
 /* -- C data arithmetic --------------------------------------------------- */
 /* -- C data arithmetic --------------------------------------------------- */
 
 
@@ -62,7 +64,7 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca)
       TValue *o2 = i == 0 ? o+1 : o-1;
       TValue *o2 = i == 0 ? o+1 : o-1;
       CType *ct = ctype_raw(cts, cdataV(o2)->ctypeid);
       CType *ct = ctype_raw(cts, cdataV(o2)->ctypeid);
       ca->ct[i] = NULL;
       ca->ct[i] = NULL;
-      ca->p[i] = NULL;
+      ca->p[i] = (uint8_t *)strVdata(o);
       ok = 0;
       ok = 0;
       if (ctype_isenum(ct->info)) {
       if (ctype_isenum(ct->info)) {
 	CTSize ofs;
 	CTSize ofs;
@@ -79,7 +81,7 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca)
       }
       }
     } else {
     } else {
       ca->ct[i] = NULL;
       ca->ct[i] = NULL;
-      ca->p[i] = NULL;
+      ca->p[i] = (void *)(intptr_t)1;  /* To make it unequal. */
       ok = 0;
       ok = 0;
     }
     }
   }
   }
@@ -234,7 +236,9 @@ static int lj_carith_meta(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
     const char *repr[2];
     const char *repr[2];
     int i, isenum = -1, isstr = -1;
     int i, isenum = -1, isstr = -1;
     if (mm == MM_eq) {  /* Equality checks never raise an error. */
     if (mm == MM_eq) {  /* Equality checks never raise an error. */
-      setboolV(L->top-1, 0);
+      int eq = ca->p[0] == ca->p[1];
+      setboolV(L->top-1, eq);
+      setboolV(&G(L)->tmptv2, eq);  /* Remember for trace recorder. */
       return 1;
       return 1;
     }
     }
     for (i = 0; i < 2; i++) {
     for (i = 0; i < 2; i++) {
@@ -270,6 +274,80 @@ int lj_carith_op(lua_State *L, MMS mm)
   return lj_carith_meta(L, cts, &ca, mm);
   return lj_carith_meta(L, cts, &ca, mm);
 }
 }
 
 
+/* -- 64 bit bit operations helpers --------------------------------------- */
+
+#if LJ_64
+#define B64DEF(name) \
+  static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
+#else
+/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
+#define B64DEF(name) \
+  uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
+#endif
+
+B64DEF(shl64) { return x << (sh&63); }
+B64DEF(shr64) { return x >> (sh&63); }
+B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
+B64DEF(rol64) { return lj_rol(x, (sh&63)); }
+B64DEF(ror64) { return lj_ror(x, (sh&63)); }
+
+#undef B64DEF
+
+uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
+{
+  switch (op) {
+  case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
+  case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
+  case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
+  case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
+  case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
+  default: lua_assert(0); break;
+  }
+  return x;
+}
+
+/* Equivalent to lj_lib_checkbit(), but handles cdata. */
+uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
+{
+  TValue *o = L->base + narg-1;
+  if (o >= L->top) {
+  err:
+    lj_err_argt(L, narg, LUA_TNUMBER);
+  } else if (LJ_LIKELY(tvisnumber(o))) {
+    /* Handled below. */
+  } else if (tviscdata(o)) {
+    CTState *cts = ctype_cts(L);
+    uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
+    CTypeID sid = cdataV(o)->ctypeid;
+    CType *s = ctype_get(cts, sid);
+    uint64_t x;
+    if (ctype_isref(s->info)) {
+      sp = *(void **)sp;
+      sid = ctype_cid(s->info);
+    }
+    s = ctype_raw(cts, sid);
+    if (ctype_isenum(s->info)) s = ctype_child(cts, s);
+    if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
+	CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
+      *id = CTID_UINT64;  /* Use uint64_t, since it has the highest rank. */
+    else if (!*id)
+      *id = CTID_INT64;  /* Use int64_t, unless already set. */
+    lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
+		   (uint8_t *)&x, sp, CCF_ARG(narg));
+    return x;
+  } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
+    goto err;
+  }
+  if (LJ_LIKELY(tvisint(o))) {
+    return (uint32_t)intV(o);
+  } else {
+    int32_t i = lj_num2bit(numV(o));
+    if (LJ_DUALNUM) setintV(o, i);
+    return (uint32_t)i;
+  }
+}
+
+
 /* -- 64 bit integer arithmetic helpers ----------------------------------- */
 /* -- 64 bit integer arithmetic helpers ----------------------------------- */
 
 
 #if LJ_32 && LJ_HASJIT
 #if LJ_32 && LJ_HASJIT

+ 11 - 1
Source/ThirdParty/LuaJIT/src/lj_carith.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** C data arithmetic.
 ** C data arithmetic.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_CARITH_H
 #ifndef _LJ_CARITH_H
@@ -12,6 +12,16 @@
 
 
 LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
 LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
 
 
+#if LJ_32
+LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
+#endif
+LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
+LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
+
 #if LJ_32 && LJ_HASJIT
 #if LJ_32 && LJ_HASJIT
 LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
 LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
 #endif
 #endif

+ 158 - 51
Source/ThirdParty/LuaJIT/src/lj_ccall.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** FFI C call handling.
 ** FFI C call handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
@@ -9,7 +9,6 @@
 
 
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
-#include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cconv.h"
@@ -291,6 +290,75 @@
 #define CCALL_HANDLE_RET \
 #define CCALL_HANDLE_RET \
   if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
   if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
 
 
+#elif LJ_TARGET_ARM64
+/* -- ARM64 calling conventions ------------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+  cc->retref = !ccall_classify_struct(cts, ctr); \
+  if (cc->retref) cc->retp = dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+  unsigned int cl = ccall_classify_struct(cts, ctr); \
+  if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
+    CTSize i = (cl >> 8) - 1; \
+    do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \
+  } else { \
+    if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
+    memcpy(dp, sp, ctr->size); \
+  }
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Complex values are returned in one or two FPRs. */ \
+  cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
+    ((float *)dp)[0] = cc->fpr[0].f; \
+    ((float *)dp)[1] = cc->fpr[1].f; \
+  } else {  /* Copy complex double from FPRs. */ \
+    ((double *)dp)[0] = cc->fpr[0].d; \
+    ((double *)dp)[1] = cc->fpr[1].d; \
+  }
+
+#define CCALL_HANDLE_STRUCTARG \
+  unsigned int cl = ccall_classify_struct(cts, d); \
+  if (cl == 0) {  /* Pass struct by reference. */ \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR; \
+  } else if (cl > 1) {  /* Pass struct in FPRs or on stack. */ \
+    isfp = (cl & 4) ? 2 : 1; \
+  }  /* else: Pass struct in GPRs or on stack. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass complex by value in separate (!) FPRs or on stack. */ \
+  isfp = ctr->size == 2*sizeof(float) ? 2 : 1;
+
+#define CCALL_HANDLE_REGARG \
+  if (LJ_TARGET_IOS && isva) { \
+    /* IOS: All variadic arguments are on the stack. */ \
+  } else if (isfp) {  /* Try to pass argument in FPRs. */ \
+    int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \
+    if (nfpr + n2 <= CCALL_NARG_FPR) { \
+      dp = &cc->fpr[nfpr]; \
+      nfpr += n2; \
+      goto done; \
+    } else { \
+      nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
+      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
+    } \
+  } else {  /* Try to pass argument in GPRs. */ \
+    if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+    if (ngpr + n <= maxgpr) { \
+      dp = &cc->gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } else { \
+      ngpr = maxgpr;  /* Prevent reordering. */ \
+      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
+    } \
+  }
+
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 /* -- PPC calling conventions --------------------------------------------- */
 /* -- PPC calling conventions --------------------------------------------- */
 
 
@@ -339,42 +407,6 @@
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
     ctr = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */
     ctr = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */
 
 
-#elif LJ_TARGET_PPCSPE
-/* -- PPC/SPE calling conventions ----------------------------------------- */
-
-#define CCALL_HANDLE_STRUCTRET \
-  cc->retref = 1;  /* Return all structs by reference. */ \
-  cc->gpr[ngpr++] = (GPRArg)dp;
-
-#define CCALL_HANDLE_COMPLEXRET \
-  /* Complex values are returned in 2 or 4 GPRs. */ \
-  cc->retref = 0;
-
-#define CCALL_HANDLE_COMPLEXRET2 \
-  memcpy(dp, sp, ctr->size);  /* Copy complex from GPRs. */
-
-#define CCALL_HANDLE_STRUCTARG \
-  rp = cdataptr(lj_cdata_new(cts, did, sz)); \
-  sz = CTSIZE_PTR;  /* Pass all structs by reference. */
-
-#define CCALL_HANDLE_COMPLEXARG \
-  /* Pass complex by value in 2 or 4 GPRs. */
-
-/* PPC/SPE has a softfp ABI. */
-#define CCALL_HANDLE_REGARG \
-  if (n > 1) {  /* Doesn't fit in a single GPR? */ \
-    lua_assert(n == 2 || n == 4);  /* int64_t, double or complex (float). */ \
-    if (n == 2) \
-      ngpr = (ngpr + 1u) & ~1u;  /* Only align 64 bit value to regpair. */ \
-    else if (ngpr + n > maxgpr) \
-      ngpr = maxgpr;  /* Prevent reordering. */ \
-  } \
-  if (ngpr + n <= maxgpr) { \
-    dp = &cc->gpr[ngpr]; \
-    ngpr += n; \
-    goto done; \
-  }
-
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 /* -- MIPS calling conventions -------------------------------------------- */
 /* -- MIPS calling conventions -------------------------------------------- */
 
 
@@ -386,6 +418,18 @@
   /* Complex values are returned in 1 or 2 FPRs. */ \
   /* Complex values are returned in 1 or 2 FPRs. */ \
   cc->retref = 0;
   cc->retref = 0;
 
 
+#if LJ_ABI_SOFTFP
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from GPRs. */ \
+    ((intptr_t *)dp)[0] = cc->gpr[0]; \
+    ((intptr_t *)dp)[1] = cc->gpr[1]; \
+  } else {  /* Copy complex double from GPRs. */ \
+    ((intptr_t *)dp)[0] = cc->gpr[0]; \
+    ((intptr_t *)dp)[1] = cc->gpr[1]; \
+    ((intptr_t *)dp)[2] = cc->gpr[2]; \
+    ((intptr_t *)dp)[3] = cc->gpr[3]; \
+  }
+#else
 #define CCALL_HANDLE_COMPLEXRET2 \
 #define CCALL_HANDLE_COMPLEXRET2 \
   if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
   if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
     ((float *)dp)[0] = cc->fpr[0].f; \
     ((float *)dp)[0] = cc->fpr[0].f; \
@@ -394,6 +438,7 @@
     ((double *)dp)[0] = cc->fpr[0].d; \
     ((double *)dp)[0] = cc->fpr[0].d; \
     ((double *)dp)[1] = cc->fpr[1].d; \
     ((double *)dp)[1] = cc->fpr[1].d; \
   }
   }
+#endif
 
 
 #define CCALL_HANDLE_STRUCTARG \
 #define CCALL_HANDLE_STRUCTARG \
   /* Pass all structs by value in registers and/or on the stack. */
   /* Pass all structs by value in registers and/or on the stack. */
@@ -401,6 +446,22 @@
 #define CCALL_HANDLE_COMPLEXARG \
 #define CCALL_HANDLE_COMPLEXARG \
   /* Pass complex by value in 2 or 4 GPRs. */
   /* Pass complex by value in 2 or 4 GPRs. */
 
 
+#define CCALL_HANDLE_GPR \
+  if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
+    ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+  if (ngpr < maxgpr) { \
+    dp = &cc->gpr[ngpr]; \
+    if (ngpr + n > maxgpr) { \
+     nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
+     if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
+     ngpr = maxgpr; \
+    } else { \
+     ngpr += n; \
+    } \
+    goto done; \
+  }
+
+#if !LJ_ABI_SOFTFP	/* MIPS32 hard-float */
 #define CCALL_HANDLE_REGARG \
 #define CCALL_HANDLE_REGARG \
   if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
   if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
     /* Try to pass argument in FPRs. */ \
     /* Try to pass argument in FPRs. */ \
@@ -409,24 +470,18 @@
     goto done; \
     goto done; \
   } else {  /* Try to pass argument in GPRs. */ \
   } else {  /* Try to pass argument in GPRs. */ \
     nfpr = CCALL_NARG_FPR; \
     nfpr = CCALL_NARG_FPR; \
-    if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
-      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
-    if (ngpr < maxgpr) { \
-      dp = &cc->gpr[ngpr]; \
-      if (ngpr + n > maxgpr) { \
-	nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
-	if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
-	ngpr = maxgpr; \
-      } else { \
-	ngpr += n; \
-      } \
-      goto done; \
-    } \
+    CCALL_HANDLE_GPR \
   }
   }
+#else			/* MIPS32 soft-float */
+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
+#endif
 
 
+#if !LJ_ABI_SOFTFP
+/* On MIPS64 soft-float, position of float return values is endian-dependant. */
 #define CCALL_HANDLE_RET \
 #define CCALL_HANDLE_RET \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
     sp = (uint8_t *)&cc->fpr[0].f;
     sp = (uint8_t *)&cc->fpr[0].f;
+#endif
 
 
 #else
 #else
 #error "Missing calling convention definitions for this architecture"
 #error "Missing calling convention definitions for this architecture"
@@ -621,6 +676,52 @@ noth:  /* Not a homogeneous float/double aggregate. */
 
 
 #endif
 #endif
 
 
+/* -- ARM64 ABI struct classification ------------------------------------- */
+
+#if LJ_TARGET_ARM64
+
+/* Classify a struct based on its fields. */
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
+{
+  CTSize sz = ct->size;
+  unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
+  while (ct->sib) {
+    CType *sct;
+    ct = ctype_get(cts, ct->sib);
+    if (ctype_isfield(ct->info)) {
+      sct = ctype_rawchild(cts, ct);
+      if (ctype_isfp(sct->info)) {
+	r |= sct->size;
+	if (!isu) n++; else if (n == 0) n = 1;
+      } else if (ctype_iscomplex(sct->info)) {
+	r |= (sct->size >> 1);
+	if (!isu) n += 2; else if (n < 2) n = 2;
+      } else if (ctype_isstruct(sct->info)) {
+	goto substruct;
+      } else {
+	goto noth;
+      }
+    } else if (ctype_isbitfield(ct->info)) {
+      goto noth;
+    } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+      sct = ctype_rawchild(cts, ct);
+    substruct:
+      if (sct->size > 0) {
+	unsigned int s = ccall_classify_struct(cts, sct);
+	if (s <= 1) goto noth;
+	r |= (s & 255);
+	if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+      }
+    }
+  }
+  if ((r == 4 || r == 8) && n <= 4)
+    return r + (n << 8);
+noth:  /* Not a homogeneous float/double aggregate. */
+  return (sz <= 16);  /* Return structs of size <= 16 in GPRs. */
+}
+
+#endif
+
 /* -- Common C call handling ---------------------------------------------- */
 /* -- Common C call handling ---------------------------------------------- */
 
 
 /* Infer the destination CTypeID for a vararg argument. */
 /* Infer the destination CTypeID for a vararg argument. */
@@ -803,6 +904,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
       cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1];  /* Split complex double. */
       cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1];  /* Split complex double. */
       cc->fpr[nfpr-2].d[1] = 0;
       cc->fpr[nfpr-2].d[1] = 0;
     }
     }
+#elif LJ_TARGET_ARM64
+    if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
+      /* Split float HFA or complex float into separate registers. */
+      CTSize i = (sz >> 2) - 1;
+      do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
+    }
 #else
 #else
     UNUSED(isfp);
     UNUSED(isfp);
 #endif
 #endif

+ 20 - 13
Source/ThirdParty/LuaJIT/src/lj_ccall.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** FFI C call handling.
 ** FFI C call handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_CCALL_H
 #ifndef _LJ_CCALL_H
@@ -68,34 +68,39 @@ typedef union FPRArg {
   float f[2];
   float f[2];
 } FPRArg;
 } FPRArg;
 
 
-#elif LJ_TARGET_PPC
+#elif LJ_TARGET_ARM64
 
 
 #define CCALL_NARG_GPR		8
 #define CCALL_NARG_GPR		8
+#define CCALL_NRET_GPR		2
 #define CCALL_NARG_FPR		8
 #define CCALL_NARG_FPR		8
-#define CCALL_NRET_GPR		4	/* For complex double. */
-#define CCALL_NRET_FPR		1
-#define CCALL_SPS_EXTRA		4
+#define CCALL_NRET_FPR		4
 #define CCALL_SPS_FREE		0
 #define CCALL_SPS_FREE		0
 
 
 typedef intptr_t GPRArg;
 typedef intptr_t GPRArg;
-typedef double FPRArg;
+typedef union FPRArg {
+  double d;
+  float f;
+  uint32_t u32;
+} FPRArg;
 
 
-#elif LJ_TARGET_PPCSPE
+#elif LJ_TARGET_PPC
 
 
 #define CCALL_NARG_GPR		8
 #define CCALL_NARG_GPR		8
-#define CCALL_NARG_FPR		0
-#define CCALL_NRET_GPR		4	/* For softfp complex double. */
-#define CCALL_NRET_FPR		0
-#define CCALL_SPS_FREE		0	/* NYI */
+#define CCALL_NARG_FPR		8
+#define CCALL_NRET_GPR		4	/* For complex double. */
+#define CCALL_NRET_FPR		1
+#define CCALL_SPS_EXTRA		4
+#define CCALL_SPS_FREE		0
 
 
 typedef intptr_t GPRArg;
 typedef intptr_t GPRArg;
+typedef double FPRArg;
 
 
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 
 
 #define CCALL_NARG_GPR		4
 #define CCALL_NARG_GPR		4
-#define CCALL_NARG_FPR		2
+#define CCALL_NARG_FPR		(LJ_ABI_SOFTFP ? 0 : 2)
 #define CCALL_NRET_GPR		2
 #define CCALL_NRET_GPR		2
-#define CCALL_NRET_FPR		2
+#define CCALL_NRET_FPR		(LJ_ABI_SOFTFP ? 0 : 2)
 #define CCALL_SPS_EXTRA		7
 #define CCALL_SPS_EXTRA		7
 #define CCALL_SPS_FREE		1
 #define CCALL_SPS_FREE		1
 
 
@@ -145,6 +150,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
   uint8_t nfpr;			/* Number of arguments in FPRs. */
   uint8_t nfpr;			/* Number of arguments in FPRs. */
 #elif LJ_TARGET_X86
 #elif LJ_TARGET_X86
   uint8_t resx87;		/* Result on x87 stack: 1:float, 2:double. */
   uint8_t resx87;		/* Result on x87 stack: 1:float, 2:double. */
+#elif LJ_TARGET_ARM64
+  void *retp;			/* Aggregate return pointer in x8. */
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
   uint8_t nfpr;			/* Number of arguments in FPRs. */
   uint8_t nfpr;			/* Number of arguments in FPRs. */
 #endif
 #endif

+ 105 - 28
Source/ThirdParty/LuaJIT/src/lj_ccallback.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** FFI C callback handling.
 ** FFI C callback handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
@@ -27,7 +27,7 @@
 
 
 #if LJ_OS_NOJIT
 #if LJ_OS_NOJIT
 
 
-/* Disabled callback support. */
+/* Callbacks disabled. */
 #define CALLBACK_SLOT2OFS(slot)	(0*(slot))
 #define CALLBACK_SLOT2OFS(slot)	(0*(slot))
 #define CALLBACK_OFS2SLOT(ofs)	(0*(ofs))
 #define CALLBACK_OFS2SLOT(ofs)	(0*(ofs))
 #define CALLBACK_MAX_SLOT	0
 #define CALLBACK_MAX_SLOT	0
@@ -35,7 +35,7 @@
 #elif LJ_TARGET_X86ORX64
 #elif LJ_TARGET_X86ORX64
 
 
 #define CALLBACK_MCODE_HEAD	(LJ_64 ? 8 : 0)
 #define CALLBACK_MCODE_HEAD	(LJ_64 ? 8 : 0)
-#define CALLBACK_MCODE_GROUP	(-2+1+2+5+(LJ_64 ? 6 : 5))
+#define CALLBACK_MCODE_GROUP	(-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
 
 
 #define CALLBACK_SLOT2OFS(slot) \
 #define CALLBACK_SLOT2OFS(slot) \
   (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
   (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
@@ -54,23 +54,18 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 #elif LJ_TARGET_ARM
 #elif LJ_TARGET_ARM
 
 
 #define CALLBACK_MCODE_HEAD		32
 #define CALLBACK_MCODE_HEAD		32
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+
+#elif LJ_TARGET_ARM64
+
+#define CALLBACK_MCODE_HEAD		32
 
 
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 
 
 #define CALLBACK_MCODE_HEAD		24
 #define CALLBACK_MCODE_HEAD		24
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
 
 
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 
 
 #define CALLBACK_MCODE_HEAD		24
 #define CALLBACK_MCODE_HEAD		24
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
 
 
 #else
 #else
 
 
@@ -81,6 +76,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 
 
 #endif
 #endif
 
 
+#ifndef CALLBACK_SLOT2OFS
+#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
+#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
+#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+#endif
+
 /* Convert callback slot number to callback function pointer. */
 /* Convert callback slot number to callback function pointer. */
 static void *callback_slot2ptr(CTState *cts, MSize slot)
 static void *callback_slot2ptr(CTState *cts, MSize slot)
 {
 {
@@ -119,8 +120,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
       /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
       /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
       *p++ = XI_PUSH + RID_EBP;
       *p++ = XI_PUSH + RID_EBP;
       *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
       *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
+#if LJ_GC64
+      *p++ = 0x48; *p++ = XI_MOVri | RID_EBP;
+      *(uint64_t *)p = (uint64_t)(g); p += 8;
+#else
       *p++ = XI_MOVri | RID_EBP;
       *p++ = XI_MOVri | RID_EBP;
       *(int32_t *)p = i32ptr(g); p += 4;
       *(int32_t *)p = i32ptr(g); p += 4;
+#endif
 #if LJ_64
 #if LJ_64
       /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
       /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
       *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
       *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
@@ -157,6 +163,26 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
   }
   }
   lua_assert(p - page <= CALLBACK_MCODE_SIZE);
   lua_assert(p - page <= CALLBACK_MCODE_SIZE);
 }
 }
+#elif LJ_TARGET_ARM64
+static void callback_mcode_init(global_State *g, uint32_t *page)
+{
+  uint32_t *p = page;
+  void *target = (void *)lj_vm_ffi_callback;
+  MSize slot;
+  *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4);
+  *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5);
+  *p++ = A64I_BR | A64F_N(RID_X11);
+  *p++ = A64I_NOP;
+  ((void **)p)[0] = target;
+  ((void **)p)[1] = g;
+  p += 4;
+  for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+    *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot);
+    *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu);
+    p++;
+  }
+  lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+}
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 static void callback_mcode_init(global_State *g, uint32_t *page)
 static void callback_mcode_init(global_State *g, uint32_t *page)
 {
 {
@@ -351,6 +377,29 @@ void lj_ccallback_mcode_free(CTState *cts)
     goto done; \
     goto done; \
   } CALLBACK_HANDLE_REGARG_FP2
   } CALLBACK_HANDLE_REGARG_FP2
 
 
+#elif LJ_TARGET_ARM64
+
+#define CALLBACK_HANDLE_REGARG \
+  if (isfp) { \
+    if (nfpr + n <= CCALL_NARG_FPR) { \
+      sp = &cts->cb.fpr[nfpr]; \
+      nfpr += n; \
+      goto done; \
+    } else { \
+      nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
+    } \
+  } else { \
+    if (!LJ_TARGET_IOS && n > 1) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+    if (ngpr + n <= maxgpr) { \
+      sp = &cts->cb.gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } else { \
+      ngpr = CCALL_NARG_GPR;  /* Prevent reordering. */ \
+    } \
+  }
+
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 
 
 #define CALLBACK_HANDLE_REGARG \
 #define CALLBACK_HANDLE_REGARG \
@@ -378,6 +427,15 @@ void lj_ccallback_mcode_free(CTState *cts)
 
 
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
 
 
+#define CALLBACK_HANDLE_GPR \
+  if (n > 1) ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+  if (ngpr + n <= maxgpr) { \
+    sp = &cts->cb.gpr[ngpr]; \
+    ngpr += n; \
+    goto done; \
+  }
+
+#if !LJ_ABI_SOFTFP	/* MIPS32 hard-float */
 #define CALLBACK_HANDLE_REGARG \
 #define CALLBACK_HANDLE_REGARG \
   if (isfp && nfpr < CCALL_NARG_FPR) {  /* Try to pass argument in FPRs. */ \
   if (isfp && nfpr < CCALL_NARG_FPR) {  /* Try to pass argument in FPRs. */ \
     sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
     sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
@@ -385,13 +443,13 @@ void lj_ccallback_mcode_free(CTState *cts)
     goto done; \
     goto done; \
   } else {  /* Try to pass argument in GPRs. */ \
   } else {  /* Try to pass argument in GPRs. */ \
     nfpr = CCALL_NARG_FPR; \
     nfpr = CCALL_NARG_FPR; \
-    if (n > 1) ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
-    if (ngpr + n <= maxgpr) { \
-      sp = &cts->cb.gpr[ngpr]; \
-      ngpr += n; \
-      goto done; \
-    } \
+    CALLBACK_HANDLE_GPR \
   }
   }
+#else			/* MIPS32 soft-float */
+#define CALLBACK_HANDLE_REGARG \
+  CALLBACK_HANDLE_GPR \
+  UNUSED(isfp);
+#endif
 
 
 #define CALLBACK_HANDLE_RET \
 #define CALLBACK_HANDLE_RET \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
@@ -411,6 +469,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
   int gcsteps = 0;
   int gcsteps = 0;
   CType *ct;
   CType *ct;
   GCfunc *fn;
   GCfunc *fn;
+  int fntp;
   MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
   MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
 #if CCALL_NARG_FPR
 #if CCALL_NARG_FPR
   MSize nfpr = 0;
   MSize nfpr = 0;
@@ -421,18 +480,27 @@ static void callback_conv_args(CTState *cts, lua_State *L)
 
 
   if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
   if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
     ct = ctype_get(cts, id);
     ct = ctype_get(cts, id);
-    rid = ctype_cid(ct->info);
+    rid = ctype_cid(ct->info);  /* Return type. x86: +(spadj<<16). */
     fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
     fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
+    fntp = LJ_TFUNC;
   } else {  /* Must set up frame first, before throwing the error. */
   } else {  /* Must set up frame first, before throwing the error. */
     ct = NULL;
     ct = NULL;
     rid = 0;
     rid = 0;
     fn = (GCfunc *)L;
     fn = (GCfunc *)L;
+    fntp = LJ_TTHREAD;
   }
   }
-  o->u32.lo = LJ_CONT_FFI_CALLBACK;  /* Continuation returns from callback. */
-  o->u32.hi = rid;  /* Return type. x86: +(spadj<<16). */
-  o++;
-  setframe_gc(o, obj2gco(fn));
-  setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT);
+  /* Continuation returns from callback. */
+  if (LJ_FR2) {
+    (o++)->u64 = LJ_CONT_FFI_CALLBACK;
+    (o++)->u64 = rid;
+    o++;
+  } else {
+    o->u32.lo = LJ_CONT_FFI_CALLBACK;
+    o->u32.hi = rid;
+    o++;
+  }
+  setframe_gc(o, obj2gco(fn), fntp);
+  setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
   L->top = L->base = ++o;
   L->top = L->base = ++o;
   if (!ct)
   if (!ct)
     lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
     lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
@@ -483,8 +551,13 @@ static void callback_conv_args(CTState *cts, lua_State *L)
   L->top = o;
   L->top = o;
 #if LJ_TARGET_X86
 #if LJ_TARGET_X86
   /* Store stack adjustment for returns from non-cdecl callbacks. */
   /* Store stack adjustment for returns from non-cdecl callbacks. */
-  if (ctype_cconv(ct->info) != CTCC_CDECL)
+  if (ctype_cconv(ct->info) != CTCC_CDECL) {
+#if LJ_FR2
+    (L->base-3)->u64 |= (nsp << (16+2));
+#else
     (L->base-2)->u32.hi |= (nsp << (16+2));
     (L->base-2)->u32.hi |= (nsp << (16+2));
+#endif
+  }
 #endif
 #endif
   while (gcsteps-- > 0)
   while (gcsteps-- > 0)
     lj_gc_check(L);
     lj_gc_check(L);
@@ -493,7 +566,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
 /* Convert Lua object to callback result. */
 /* Convert Lua object to callback result. */
 static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
 static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
 {
 {
+#if LJ_FR2
+  CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
+#else
   CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
   CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
+#endif
 #if LJ_TARGET_X86
 #if LJ_TARGET_X86
   cts->cb.gpr[2] = 0;
   cts->cb.gpr[2] = 0;
 #endif
 #endif
@@ -529,7 +606,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
   lua_State *L = cts->L;
   lua_State *L = cts->L;
   global_State *g = cts->g;
   global_State *g = cts->g;
   lua_assert(L != NULL);
   lua_assert(L != NULL);
-  if (gcref(g->jit_L)) {
+  if (tvref(g->jit_base)) {
     setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
     setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
     if (g->panic) g->panic(L);
     if (g->panic) g->panic(L);
     exit(EXIT_FAILURE);
     exit(EXIT_FAILURE);
@@ -562,9 +639,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
   }
   }
   callback_conv_result(cts, L, o);
   callback_conv_result(cts, L, o);
   /* Finally drop C frame and continuation frame. */
   /* Finally drop C frame and continuation frame. */
-  L->cframe = cframe_prev(L->cframe);
-  L->top -= 2;
+  L->top -= 2+2*LJ_FR2;
   L->base = obase;
   L->base = obase;
+  L->cframe = cframe_prev(L->cframe);
   cts->cb.slot = 0;  /* Blacklist C function that called the callback. */
   cts->cb.slot = 0;  /* Blacklist C function that called the callback. */
 }
 }
 
 

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lj_ccallback.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** FFI C callback handling.
 ** FFI C callback handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_CCALLBACK_H
 #ifndef _LJ_CCALLBACK_H

+ 2 - 1
Source/ThirdParty/LuaJIT/src/lj_cconv.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** C type conversions.
 ** C type conversions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
@@ -702,6 +702,7 @@ static void cconv_substruct_init(CTState *cts, CType *d, uint8_t *dp,
     } else if (ctype_isxattrib(df->info, CTA_SUBTYPE)) {
     } else if (ctype_isxattrib(df->info, CTA_SUBTYPE)) {
       cconv_substruct_init(cts, ctype_rawchild(cts, df),
       cconv_substruct_init(cts, ctype_rawchild(cts, df),
 			   dp+df->size, o, len, ip);
 			   dp+df->size, o, len, ip);
+      if ((d->info & CTF_UNION)) break;
     }  /* Ignore all other entries in the chain. */
     }  /* Ignore all other entries in the chain. */
   }
   }
 }
 }

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lj_cconv.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** C type conversions.
 ** C type conversions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_CCONV_H
 #ifndef _LJ_CCONV_H

+ 35 - 23
Source/ThirdParty/LuaJIT/src/lj_cdata.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** C data management.
 ** C data management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
@@ -9,7 +9,6 @@
 
 
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
-#include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cconv.h"
@@ -27,12 +26,12 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
 }
 }
 
 
 /* Allocate variable-sized or specially aligned C data object. */
 /* Allocate variable-sized or specially aligned C data object. */
-GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
+GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
 {
 {
   global_State *g;
   global_State *g;
   MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
   MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
 		(align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
 		(align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
-  char *p = lj_mem_newt(cts->L, extra + sz, char);
+  char *p = lj_mem_newt(L, extra + sz, char);
   uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
   uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
   uintptr_t almask = (1u << align) - 1u;
   uintptr_t almask = (1u << align) - 1u;
   GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
   GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
@@ -40,7 +39,7 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
   cdatav(cd)->offset = (uint16_t)((char *)cd - p);
   cdatav(cd)->offset = (uint16_t)((char *)cd - p);
   cdatav(cd)->extra = extra;
   cdatav(cd)->extra = extra;
   cdatav(cd)->len = sz;
   cdatav(cd)->len = sz;
-  g = cts->g;
+  g = G(L);
   setgcrefr(cd->nextgc, g->gc.root);
   setgcrefr(cd->nextgc, g->gc.root);
   setgcref(g->gc.root, obj2gco(cd));
   setgcref(g->gc.root, obj2gco(cd));
   newwhite(g, obj2gco(cd));
   newwhite(g, obj2gco(cd));
@@ -50,6 +49,15 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
   return cd;
   return cd;
 }
 }
 
 
+/* Allocate arbitrary C data object. */
+GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info)
+{
+  if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
+    return lj_cdata_new(cts, id, sz);
+  else
+    return lj_cdata_newv(cts->L, id, sz, ctype_align(info));
+}
+
 /* Free a C data object. */
 /* Free a C data object. */
 void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
 void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
 {
 {
@@ -76,21 +84,20 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
   }
   }
 }
 }
 
 
-TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd)
+void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
 {
 {
-  global_State *g = G(L);
-  GCtab *t = ctype_ctsG(g)->finalizer;
+  GCtab *t = ctype_ctsG(G(L))->finalizer;
   if (gcref(t->metatable)) {
   if (gcref(t->metatable)) {
     /* Add cdata to finalizer table, if still enabled. */
     /* Add cdata to finalizer table, if still enabled. */
     TValue *tv, tmp;
     TValue *tv, tmp;
     setcdataV(L, &tmp, cd);
     setcdataV(L, &tmp, cd);
     lj_gc_anybarriert(L, t);
     lj_gc_anybarriert(L, t);
     tv = lj_tab_set(L, t, &tmp);
     tv = lj_tab_set(L, t, &tmp);
-    cd->marked |= LJ_GC_CDATA_FIN;
-    return tv;
-  } else {
-    /* Otherwise return dummy TValue. */
-    return &g->tmptv;
+    setgcV(L, tv, obj, it);
+    if (!tvisnil(tv))
+      cd->marked |= LJ_GC_CDATA_FIN;
+    else
+      cd->marked &= ~LJ_GC_CDATA_FIN;
   }
   }
 }
 }
 
 
@@ -123,20 +130,25 @@ collect_attrib:
     idx = (ptrdiff_t)intV(key);
     idx = (ptrdiff_t)intV(key);
     goto integer_key;
     goto integer_key;
   } else if (tvisnum(key)) {  /* Numeric key. */
   } else if (tvisnum(key)) {  /* Numeric key. */
-    idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key));
+#ifdef _MSC_VER
+    /* Workaround for MSVC bug. */
+    volatile
+#endif
+    lua_Number n = numV(key);
+    idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
   integer_key:
   integer_key:
     if (ctype_ispointer(ct->info)) {
     if (ctype_ispointer(ct->info)) {
       CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info));  /* Element size. */
       CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info));  /* Element size. */
-      if (sz != CTSIZE_INVALID) {
-	if (ctype_isptr(ct->info)) {
-	  p = (uint8_t *)cdata_getptr(p, ct->size);
-	} else if ((ct->info & (CTF_VECTOR|CTF_COMPLEX))) {
-	  if ((ct->info & CTF_COMPLEX)) idx &= 1;
-	  *qual |= CTF_CONST;  /* Valarray elements are constant. */
-	}
-	*pp = p + idx*(int32_t)sz;
-	return ct;
+      if (sz == CTSIZE_INVALID)
+	lj_err_caller(cts->L, LJ_ERR_FFI_INVSIZE);
+      if (ctype_isptr(ct->info)) {
+	p = (uint8_t *)cdata_getptr(p, ct->size);
+      } else if ((ct->info & (CTF_VECTOR|CTF_COMPLEX))) {
+	if ((ct->info & CTF_COMPLEX)) idx &= 1;
+	*qual |= CTF_CONST;  /* Valarray elements are constant. */
       }
       }
+      *pp = p + idx*(int32_t)sz;
+      return ct;
     }
     }
   } else if (tviscdata(key)) {  /* Integer cdata key. */
   } else if (tviscdata(key)) {  /* Integer cdata key. */
     GCcdata *cdk = cdataV(key);
     GCcdata *cdk = cdataV(key);

+ 6 - 3
Source/ThirdParty/LuaJIT/src/lj_cdata.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** C data management.
 ** C data management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_CDATA_H
 #ifndef _LJ_CDATA_H
@@ -58,11 +58,14 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
 }
 }
 
 
 LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
 LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
-LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz,
+LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
 			       CTSize align);
 			       CTSize align);
+LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz,
+			       CTInfo info);
 
 
 LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
 LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
-LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd);
+LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
+			     uint32_t it);
 
 
 LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
 LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
 			      uint8_t **pp, CTInfo *qual);
 			      uint8_t **pp, CTInfo *qual);

+ 20 - 11
Source/ThirdParty/LuaJIT/src/lj_clib.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** FFI C library loader.
 ** FFI C library loader.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
@@ -16,6 +16,7 @@
 #include "lj_cconv.h"
 #include "lj_cconv.h"
 #include "lj_cdata.h"
 #include "lj_cdata.h"
 #include "lj_clib.h"
 #include "lj_clib.h"
+#include "lj_strfmt.h"
 
 
 /* -- OS-specific functions ----------------------------------------------- */
 /* -- OS-specific functions ----------------------------------------------- */
 
 
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name)
 #endif
 #endif
      ) {
      ) {
     if (!strchr(name, '.')) {
     if (!strchr(name, '.')) {
-      name = lj_str_pushf(L, CLIB_SOEXT, name);
+      name = lj_strfmt_pushf(L, CLIB_SOEXT, name);
       L->top--;
       L->top--;
 #ifdef __CYGWIN__
 #ifdef __CYGWIN__
     } else {
     } else {
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name)
     }
     }
     if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
     if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
 	  name[2] == CLIB_SOPREFIX[2])) {
 	  name[2] == CLIB_SOPREFIX[2])) {
-      name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name);
+      name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name);
       L->top--;
       L->top--;
     }
     }
   }
   }
@@ -171,11 +172,19 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
 					    const char *name)
 					    const char *name)
 {
 {
   DWORD err = GetLastError();
   DWORD err = GetLastError();
+#if LJ_TARGET_XBOXONE
+  wchar_t wbuf[128];
+  char buf[128*2];
+  if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
+		      NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) ||
+      !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL))
+#else
   char buf[128];
   char buf[128];
   if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
   if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
 		      NULL, err, 0, buf, sizeof(buf), NULL))
 		      NULL, err, 0, buf, sizeof(buf), NULL))
+#endif
     buf[0] = '\0';
     buf[0] = '\0';
-  lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf));
+  lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf));
 }
 }
 
 
 static int clib_needext(const char *s)
 static int clib_needext(const char *s)
@@ -190,7 +199,7 @@ static int clib_needext(const char *s)
 static const char *clib_extname(lua_State *L, const char *name)
 static const char *clib_extname(lua_State *L, const char *name)
 {
 {
   if (clib_needext(name)) {
   if (clib_needext(name)) {
-    name = lj_str_pushf(L, "%s.dll", name);
+    name = lj_strfmt_pushf(L, "%s.dll", name);
     L->top--;
     L->top--;
   }
   }
   return name;
   return name;
@@ -199,7 +208,7 @@ static const char *clib_extname(lua_State *L, const char *name)
 static void *clib_loadlib(lua_State *L, const char *name, int global)
 static void *clib_loadlib(lua_State *L, const char *name, int global)
 {
 {
   DWORD oldwerr = GetLastError();
   DWORD oldwerr = GetLastError();
-  void *h = (void *)LoadLibraryA(clib_extname(L, name));
+  void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0);
   if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
   if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
   SetLastError(oldwerr);
   SetLastError(oldwerr);
   UNUSED(global);
   UNUSED(global);
@@ -240,9 +249,9 @@ static void *clib_getsym(CLibrary *cl, const char *name)
 	  GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
 	  GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
 			     (const char *)&_fmode, &h);
 			     (const char *)&_fmode, &h);
 	  break;
 	  break;
-	case CLIB_HANDLE_KERNEL32: h = LoadLibraryA("kernel32.dll"); break;
-	case CLIB_HANDLE_USER32: h = LoadLibraryA("user32.dll"); break;
-	case CLIB_HANDLE_GDI32: h = LoadLibraryA("gdi32.dll"); break;
+	case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0); break;
+	case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break;
+	case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break;
 	}
 	}
 	if (!h) continue;
 	if (!h) continue;
 	clib_def_handle[i] = (void *)h;
 	clib_def_handle[i] = (void *)h;
@@ -263,7 +272,7 @@ static void *clib_getsym(CLibrary *cl, const char *name)
 LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
 LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
 					    const char *name)
 					    const char *name)
 {
 {
-  lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS"));
+  lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS"));
 }
 }
 
 
 static void *clib_loadlib(lua_State *L, const char *name, int global)
 static void *clib_loadlib(lua_State *L, const char *name, int global)
@@ -347,7 +356,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
 	CTInfo cconv = ctype_cconv(ct->info);
 	CTInfo cconv = ctype_cconv(ct->info);
 	if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
 	if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
 	  CTSize sz = clib_func_argsize(cts, ct);
 	  CTSize sz = clib_func_argsize(cts, ct);
-	  const char *symd = lj_str_pushf(L,
+	  const char *symd = lj_strfmt_pushf(L,
 			       cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
 			       cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
 			       sym, sz);
 			       sym, sz);
 	  L->top--;
 	  L->top--;

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lj_clib.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** FFI C library loader.
 ** FFI C library loader.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_CLIB_H
 #ifndef _LJ_CLIB_H

+ 24 - 34
Source/ThirdParty/LuaJIT/src/lj_cparse.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** C declaration parser.
 ** C declaration parser.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
@@ -9,13 +9,14 @@
 
 
 #include "lj_gc.h"
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_cparse.h"
 #include "lj_cparse.h"
 #include "lj_frame.h"
 #include "lj_frame.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "lj_char.h"
 #include "lj_char.h"
 #include "lj_strscan.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 
 /*
 /*
 ** Important note: this is NOT a validating C parser! This is a minimal
 ** Important note: this is NOT a validating C parser! This is a minimal
@@ -46,9 +47,9 @@ static const char *cp_tok2str(CPState *cp, CPToken tok)
   if (tok > CTOK_OFS)
   if (tok > CTOK_OFS)
     return ctoknames[tok-CTOK_OFS-1];
     return ctoknames[tok-CTOK_OFS-1];
   else if (!lj_char_iscntrl(tok))
   else if (!lj_char_iscntrl(tok))
-    return lj_str_pushf(cp->L, "%c", tok);
+    return lj_strfmt_pushf(cp->L, "%c", tok);
   else
   else
-    return lj_str_pushf(cp->L, "char(%d)", tok);
+    return lj_strfmt_pushf(cp->L, "char(%d)", tok);
 }
 }
 
 
 /* End-of-line? */
 /* End-of-line? */
@@ -85,24 +86,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp)
   return cp_get(cp);
   return cp_get(cp);
 }
 }
 
 
-/* Grow save buffer. */
-static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
-{
-  MSize newsize;
-  if (cp->sb.sz >= CPARSE_MAX_BUF/2)
-    cp_err(cp, LJ_ERR_XELEM);
-  newsize = cp->sb.sz * 2;
-  lj_str_resizebuf(cp->L, &cp->sb, newsize);
-  cp->sb.buf[cp->sb.n++] = (char)c;
-}
-
 /* Save character in buffer. */
 /* Save character in buffer. */
 static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
 static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
 {
 {
-  if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz))
-    cp_save_grow(cp, c);
-  else
-    cp->sb.buf[cp->sb.n++] = (char)c;
+  lj_buf_putb(&cp->sb, c);
 }
 }
 
 
 /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
 /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,20 +109,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
     tokstr = NULL;
     tokstr = NULL;
   } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
   } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
 	     tok >= CTOK_FIRSTDECL) {
 	     tok >= CTOK_FIRSTDECL) {
-    if (cp->sb.n == 0) cp_save(cp, '$');
+    if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
     cp_save(cp, '\0');
     cp_save(cp, '\0');
-    tokstr = cp->sb.buf;
+    tokstr = sbufB(&cp->sb);
   } else {
   } else {
     tokstr = cp_tok2str(cp, tok);
     tokstr = cp_tok2str(cp, tok);
   }
   }
   L = cp->L;
   L = cp->L;
   va_start(argp, em);
   va_start(argp, em);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);
   va_end(argp);
   if (tokstr)
   if (tokstr)
-    msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
+    msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
   if (cp->linenumber > 1)
   if (cp->linenumber > 1)
-    msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber);
+    msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber);
   lj_err_callermsg(L, msg);
   lj_err_callermsg(L, msg);
 }
 }
 
 
@@ -164,7 +151,7 @@ static CPToken cp_number(CPState *cp)
   TValue o;
   TValue o;
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
   cp_save(cp, '\0');
   cp_save(cp, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C);
+  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
   if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
   if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
   else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
   else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
   else if (!(cp->mode & CPARSE_MODE_SKIP))
   else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +164,7 @@ static CPToken cp_number(CPState *cp)
 static CPToken cp_ident(CPState *cp)
 static CPToken cp_ident(CPState *cp)
 {
 {
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
-  cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n);
+  cp->str = lj_buf_str(cp->L, &cp->sb);
   cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
   cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
   if (ctype_type(cp->ct->info) == CT_KW)
   if (ctype_type(cp->ct->info) == CT_KW)
     return ctype_cid(cp->ct->info);
     return ctype_cid(cp->ct->info);
@@ -263,11 +250,11 @@ static CPToken cp_string(CPState *cp)
   }
   }
   cp_get(cp);
   cp_get(cp);
   if (delim == '"') {
   if (delim == '"') {
-    cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n);
+    cp->str = lj_buf_str(cp->L, &cp->sb);
     return CTOK_STRING;
     return CTOK_STRING;
   } else {
   } else {
-    if (cp->sb.n != 1) cp_err_token(cp, '\'');
-    cp->val.i32 = (int32_t)(char)cp->sb.buf[0];
+    if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
+    cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
     cp->val.id = CTID_INT32;
     cp->val.id = CTID_INT32;
     return CTOK_INTEGER;
     return CTOK_INTEGER;
   }
   }
@@ -296,7 +283,7 @@ static void cp_comment_cpp(CPState *cp)
 /* Lexical scanner for C. Only a minimal subset is implemented. */
 /* Lexical scanner for C. Only a minimal subset is implemented. */
 static CPToken cp_next_(CPState *cp)
 static CPToken cp_next_(CPState *cp)
 {
 {
-  lj_str_resetbuf(&cp->sb);
+  lj_buf_reset(&cp->sb);
   for (;;) {
   for (;;) {
     if (lj_char_isident(cp->c))
     if (lj_char_isident(cp->c))
       return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
       return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -380,8 +367,7 @@ static void cp_init(CPState *cp)
   cp->depth = 0;
   cp->depth = 0;
   cp->curpack = 0;
   cp->curpack = 0;
   cp->packstack[0] = 255;
   cp->packstack[0] = 255;
-  lj_str_initbuf(&cp->sb);
-  lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF);
+  lj_buf_init(cp->L, &cp->sb);
   lua_assert(cp->p != NULL);
   lua_assert(cp->p != NULL);
   cp_get(cp);  /* Read-ahead first char. */
   cp_get(cp);  /* Read-ahead first char. */
   cp->tok = 0;
   cp->tok = 0;
@@ -393,7 +379,7 @@ static void cp_init(CPState *cp)
 static void cp_cleanup(CPState *cp)
 static void cp_cleanup(CPState *cp)
 {
 {
   global_State *g = G(cp->L);
   global_State *g = G(cp->L);
-  lj_str_freebuf(g, &cp->sb);
+  lj_buf_free(g, &cp->sb);
 }
 }
 
 
 /* Check and consume optional token. */
 /* Check and consume optional token. */
@@ -798,6 +784,10 @@ static void cp_push_type(CPDecl *decl, CTypeID id)
     cp_push(decl, info & ~CTMASK_CID, size);  /* Copy type. */
     cp_push(decl, info & ~CTMASK_CID, size);  /* Copy type. */
     break;
     break;
   case CT_ARRAY:
   case CT_ARRAY:
+    if ((ct->info & (CTF_VECTOR|CTF_COMPLEX))) {
+      info |= (decl->attr & CTF_QUAL);
+      decl->attr &= ~CTF_QUAL;
+    }
     cp_push_type(decl, ctype_cid(info));  /* Unroll. */
     cp_push_type(decl, ctype_cid(info));  /* Unroll. */
     cp_push(decl, info & ~CTMASK_CID, size);  /* Copy type. */
     cp_push(decl, info & ~CTMASK_CID, size);  /* Copy type. */
     decl->stack[decl->pos].sib = 1;  /* Mark as already checked and sized. */
     decl->stack[decl->pos].sib = 1;  /* Mark as already checked and sized. */
@@ -1012,7 +1002,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl)
   if (cp->tok == CTOK_STRING) {
   if (cp->tok == CTOK_STRING) {
     GCstr *str = cp->str;
     GCstr *str = cp->str;
     while (cp_next(cp) == CTOK_STRING) {
     while (cp_next(cp) == CTOK_STRING) {
-      lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
+      lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
       cp->L->top--;
       cp->L->top--;
       str = strV(cp->L->top);
       str = strV(cp->L->top);
     }
     }

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lj_cparse.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** C declaration parser.
 ** C declaration parser.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_CPARSE_H
 #ifndef _LJ_CPARSE_H

+ 249 - 68
Source/ThirdParty/LuaJIT/src/lj_crecord.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Trace recorder for C data operations.
 ** Trace recorder for C data operations.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_ffrecord_c
 #define lj_ffrecord_c
@@ -11,13 +11,13 @@
 #if LJ_HASJIT && LJ_HASFFI
 #if LJ_HASJIT && LJ_HASFFI
 
 
 #include "lj_err.h"
 #include "lj_err.h"
-#include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_frame.h"
 #include "lj_frame.h"
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_cdata.h"
 #include "lj_cdata.h"
 #include "lj_cparse.h"
 #include "lj_cparse.h"
 #include "lj_cconv.h"
 #include "lj_cconv.h"
+#include "lj_carith.h"
 #include "lj_clib.h"
 #include "lj_clib.h"
 #include "lj_ccall.h"
 #include "lj_ccall.h"
 #include "lj_ff.h"
 #include "lj_ff.h"
@@ -31,6 +31,7 @@
 #include "lj_snap.h"
 #include "lj_snap.h"
 #include "lj_crecord.h"
 #include "lj_crecord.h"
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
+#include "lj_strfmt.h"
 
 
 /* Some local macros to save typing. Undef'd at the end. */
 /* Some local macros to save typing. Undef'd at the end. */
 #define IR(ref)			(&J->cur.ir[(ref)])
 #define IR(ref)			(&J->cur.ir[(ref)])
@@ -441,7 +442,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
     /* fallthrough */
     /* fallthrough */
   case CCX(I, F):
   case CCX(I, F):
     if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
     if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
-    sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY);
+    sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
     goto xstore;
     goto xstore;
   case CCX(I, P):
   case CCX(I, P):
   case CCX(I, A):
   case CCX(I, A):
@@ -521,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
     if (st == IRT_CDATA) goto err_nyi;
     if (st == IRT_CDATA) goto err_nyi;
     /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
     /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
     sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
     sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
-		  st, IRCONV_TRUNC|IRCONV_ANY);
+		  st, IRCONV_ANY);
     goto xstore;
     goto xstore;
 
 
   /* Destination is an array. */
   /* Destination is an array. */
@@ -640,12 +641,23 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
       sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr)));
       sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr)));
       sid = CTID_A_CCHAR;
       sid = CTID_A_CCHAR;
     }
     }
-  } else {  /* NYI: tref_istab(sp), tref_islightud(sp). */
+  } else if (tref_islightud(sp)) {
+#if LJ_64
+    sp = emitir(IRT(IR_BAND, IRT_P64), sp,
+		lj_ir_kint64(J, U64x(00007fff,ffffffff)));
+#endif
+  } else {  /* NYI: tref_istab(sp). */
     IRType t;
     IRType t;
     sid = argv2cdata(J, sp, sval)->ctypeid;
     sid = argv2cdata(J, sp, sval)->ctypeid;
     s = ctype_raw(cts, sid);
     s = ctype_raw(cts, sid);
     svisnz = cdataptr(cdataV(sval));
     svisnz = cdataptr(cdataV(sval));
-    t = crec_ct2irt(cts, s);
+    if (ctype_isfunc(s->info)) {
+      sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
+      s = ctype_get(cts, sid);
+      t = IRT_PTR;
+    } else {
+      t = crec_ct2irt(cts, s);
+    }
     if (ctype_isptr(s->info)) {
     if (ctype_isptr(s->info)) {
       sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR);
       sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR);
       if (ctype_isref(s->info)) {
       if (ctype_isref(s->info)) {
@@ -794,7 +806,7 @@ again:
     }
     }
   } else if (tref_isstr(idx)) {
   } else if (tref_isstr(idx)) {
     GCstr *name = strV(&rd->argv[1]);
     GCstr *name = strV(&rd->argv[1]);
-    if (cd->ctypeid == CTID_CTYPEID)
+    if (cd && cd->ctypeid == CTID_CTYPEID)
       ct = ctype_raw(cts, crec_constructor(J, cd, ptr));
       ct = ctype_raw(cts, crec_constructor(J, cd, ptr));
     if (ctype_isstruct(ct->info)) {
     if (ctype_isstruct(ct->info)) {
       CTSize fofs;
       CTSize fofs;
@@ -835,6 +847,7 @@ again:
       CType *cct = ctype_rawchild(cts, ct);
       CType *cct = ctype_rawchild(cts, ct);
       if (ctype_isstruct(cct->info)) {
       if (ctype_isstruct(cct->info)) {
 	ct = cct;
 	ct = cct;
+	cd = NULL;
 	if (tref_isstr(idx)) goto again;
 	if (tref_isstr(idx)) goto again;
       }
       }
     }
     }
@@ -847,8 +860,11 @@ again:
 
 
   /* Resolve reference for field. */
   /* Resolve reference for field. */
   ct = ctype_get(cts, sid);
   ct = ctype_get(cts, sid);
-  if (ctype_isref(ct->info))
+  if (ctype_isref(ct->info)) {
     ptr = emitir(IRT(IR_XLOAD, IRT_PTR), ptr, 0);
     ptr = emitir(IRT(IR_XLOAD, IRT_PTR), ptr, 0);
+    sid = ctype_cid(ct->info);
+    ct = ctype_get(cts, sid);
+  }
 
 
   while (ctype_isattrib(ct->info))
   while (ctype_isattrib(ct->info))
     ct = ctype_child(cts, ct);  /* Skip attributes. */
     ct = ctype_child(cts, ct);  /* Skip attributes. */
@@ -863,21 +879,17 @@ again:
 }
 }
 
 
 /* Record setting a finalizer. */
 /* Record setting a finalizer. */
-static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin)
+static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin)
 {
 {
-  TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd);
-  TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4));
-  if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; }
-  if (tvisfunc(fin)) {
-    emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin)));
-    emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC));
-  } else if (tviscdata(fin)) {
-    emitir(IRT(IR_XSTORE, IRT_P32), trlo,
-	   lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA));
-    emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA));
+  if (tvisgcv(fin)) {
+    if (!trfin) trfin = lj_ir_kptr(J, gcval(fin));
+  } else if (tvisnil(fin)) {
+    trfin = lj_ir_kptr(J, NULL);
   } else {
   } else {
     lj_trace_err(J, LJ_TRERR_BADTYPE);
     lj_trace_err(J, LJ_TRERR_BADTYPE);
   }
   }
+  lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd,
+	     trfin, lj_ir_kint(J, (int32_t)itype(fin)));
   J->needsnap = 1;
   J->needsnap = 1;
 }
 }
 
 
@@ -888,10 +900,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
   CTSize sz;
   CTSize sz;
   CTInfo info = lj_ctype_info(cts, id, &sz);
   CTInfo info = lj_ctype_info(cts, id, &sz);
   CType *d = ctype_raw(cts, id);
   CType *d = ctype_raw(cts, id);
-  TRef trid;
-  if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN)
-    lj_trace_err(J, LJ_TRERR_NYICONV);  /* NYI: large/special allocations. */
-  trid = lj_ir_kint(J, id);
+  TRef trcd, trid = lj_ir_kint(J, id);
+  cTValue *fin;
   /* Use special instruction to box pointer or 32/64 bit integer. */
   /* Use special instruction to box pointer or 32/64 bit integer. */
   if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
   if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
     TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
     TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
@@ -899,11 +909,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
 	      sz == 4 ? lj_ir_kint(J, 0) :
 	      sz == 4 ? lj_ir_kint(J, 0) :
 	      (lj_needsplit(J), lj_ir_kint64(J, 0));
 	      (lj_needsplit(J), lj_ir_kint64(J, 0));
     J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
     J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
+    return;
   } else {
   } else {
-    TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL);
-    cTValue *fin;
-    J->base[0] = trcd;
-    if (J->base[1] && !J->base[2] &&
+    TRef trsz = TREF_NIL;
+    if ((info & CTF_VLA)) {  /* Calculate VLA/VLS size at runtime. */
+      CTSize sz0, sz1;
+      if (!J->base[1] || J->base[2])
+	lj_trace_err(J, LJ_TRERR_NYICONV);  /* NYI: init VLA/VLS. */
+      trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0,
+			J->base[1], &rd->argv[1]);
+      sz0 = lj_ctype_vlsize(cts, d, 0);
+      sz1 = lj_ctype_vlsize(cts, d, 1);
+      trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0)));
+      trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0));
+      J->base[1] = 0;  /* Simplify logic below. */
+    } else if (ctype_align(info) > CT_MEMALIGN) {
+      trsz = lj_ir_kint(J, sz);
+    }
+    trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz);
+    if (sz > 128 || (info & CTF_VLA)) {
+      TRef dp;
+      CTSize align;
+    special:  /* Only handle bulk zero-fill for large/VLA/VLS types. */
+      if (J->base[1])
+	lj_trace_err(J, LJ_TRERR_NYICONV);  /* NYI: init large/VLA/VLS types. */
+      dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
+      if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz);
+      align = ctype_align(info);
+      if (align < CT_MEMALIGN) align = CT_MEMALIGN;
+      crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align));
+    } else if (J->base[1] && !J->base[2] &&
 	!lj_cconv_multi_init(cts, d, &rd->argv[1])) {
 	!lj_cconv_multi_init(cts, d, &rd->argv[1])) {
       goto single_init;
       goto single_init;
     } else if (ctype_isarray(d->info)) {
     } else if (ctype_isarray(d->info)) {
@@ -914,8 +949,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
       TValue *sval = &tv;
       TValue *sval = &tv;
       MSize i;
       MSize i;
       tv.u64 = 0;
       tv.u64 = 0;
-      if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)))
-	lj_trace_err(J, LJ_TRERR_NYICONV);  /* NYI: init array of aggregates. */
+      if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) ||
+	  esize * CREC_FILL_MAXUNROLL < sz)
+	goto special;
       for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
       for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
 	TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
 	TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
 			 lj_ir_kintp(J, ofs + sizeof(GCcdata)));
 			 lj_ir_kintp(J, ofs + sizeof(GCcdata)));
@@ -972,11 +1008,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
 	crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
 	crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
       }
       }
     }
     }
-    /* Handle __gc metamethod. */
-    fin = lj_ctype_meta(cts, id, MM_gc);
-    if (fin)
-      crec_finalizer(J, trcd, fin);
   }
   }
+  J->base[0] = trcd;
+  /* Handle __gc metamethod. */
+  fin = lj_ctype_meta(cts, id, MM_gc);
+  if (fin)
+    crec_finalizer(J, trcd, 0, fin);
 }
 }
 
 
 /* Record argument conversions. */
 /* Record argument conversions. */
@@ -1086,7 +1123,7 @@ static void crec_snap_caller(jit_State *J)
   ptrdiff_t delta;
   ptrdiff_t delta;
   if (!frame_islua(base-1) || J->framedepth <= 0)
   if (!frame_islua(base-1) || J->framedepth <= 0)
     lj_trace_err(J, LJ_TRERR_NYICALL);
     lj_trace_err(J, LJ_TRERR_NYICALL);
-  J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]);
+  J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
   L->top = base; L->base = base - delta;
   L->top = base; L->base = base - delta;
   J->base[-1] = TREF_FALSE;
   J->base[-1] = TREF_FALSE;
   J->base -= delta; J->baseslot -= (BCReg)delta;
   J->base -= delta; J->baseslot -= (BCReg)delta;
@@ -1229,7 +1266,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
     for (i = 0; i < 2; i++) {
     for (i = 0; i < 2; i++) {
       IRType st = tref_type(sp[i]);
       IRType st = tref_type(sp[i]);
       if (st == IRT_NUM || st == IRT_FLOAT)
       if (st == IRT_NUM || st == IRT_FLOAT)
-	sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY);
+	sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
       else if (!(st == IRT_I64 || st == IRT_U64))
       else if (!(st == IRT_I64 || st == IRT_U64))
 	sp[i] = emitconv(sp[i], dt, IRT_INT,
 	sp[i] = emitconv(sp[i], dt, IRT_INT,
 			 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
 			 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1297,15 +1334,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
     CTypeID id;
     CTypeID id;
 #if LJ_64
 #if LJ_64
     if (t == IRT_NUM || t == IRT_FLOAT)
     if (t == IRT_NUM || t == IRT_FLOAT)
-      tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY);
+      tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
     else if (!(t == IRT_I64 || t == IRT_U64))
     else if (!(t == IRT_I64 || t == IRT_U64))
       tr = emitconv(tr, IRT_INTP, IRT_INT,
       tr = emitconv(tr, IRT_INTP, IRT_INT,
 		    ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
 		    ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
 #else
 #else
     if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
     if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
       tr = emitconv(tr, IRT_INTP, t,
       tr = emitconv(tr, IRT_INTP, t,
-		    (t == IRT_NUM || t == IRT_FLOAT) ?
-		    IRCONV_TRUNC|IRCONV_ANY : 0);
+		    (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
     }
     }
 #endif
 #endif
     tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
     tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1317,7 +1353,8 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
 }
 }
 
 
 /* Record ctype arithmetic metamethods. */
 /* Record ctype arithmetic metamethods. */
-static void crec_arith_meta(jit_State *J, CTState *cts, RecordFFData *rd)
+static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
+			    RecordFFData *rd)
 {
 {
   cTValue *tv = NULL;
   cTValue *tv = NULL;
   if (J->base[0]) {
   if (J->base[0]) {
@@ -1338,13 +1375,20 @@ static void crec_arith_meta(jit_State *J, CTState *cts, RecordFFData *rd)
     if (tvisfunc(tv)) {
     if (tvisfunc(tv)) {
       J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
       J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
       rd->nres = -1;  /* Pending tailcall. */
       rd->nres = -1;  /* Pending tailcall. */
-      return;
+      return 0;
     }  /* NYI: non-function metamethods. */
     }  /* NYI: non-function metamethods. */
-  } else if ((MMS)rd->data == MM_eq) {
-    J->base[0] = TREF_FALSE;
-    return;
+  } else if ((MMS)rd->data == MM_eq) {  /* Fallback cdata pointer comparison. */
+    if (sp[0] && sp[1] && ctype_isnum(s[0]->info) == ctype_isnum(s[1]->info)) {
+      /* Assume true comparison. Fixup and emit pending guard later. */
+      lj_ir_set(J, IRTG(IR_EQ, IRT_PTR), sp[0], sp[1]);
+      J->postproc = LJ_POST_FIXGUARD;
+      return TREF_TRUE;
+    } else {
+      return TREF_FALSE;
+    }
   }
   }
   lj_trace_err(J, LJ_TRERR_BADTYPE);
   lj_trace_err(J, LJ_TRERR_BADTYPE);
+  return 0;
 }
 }
 
 
 void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
 void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
@@ -1357,7 +1401,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
     TRef tr = J->base[i];
     TRef tr = J->base[i];
     CType *ct = ctype_get(cts, CTID_DOUBLE);
     CType *ct = ctype_get(cts, CTID_DOUBLE);
     if (!tr) {
     if (!tr) {
-      goto trymeta;
+      lj_trace_err(J, LJ_TRERR_BADTYPE);
     } else if (tref_iscdata(tr)) {
     } else if (tref_iscdata(tr)) {
       CTypeID id = argv2cdata(J, tr, &rd->argv[i])->ctypeid;
       CTypeID id = argv2cdata(J, tr, &rd->argv[i])->ctypeid;
       IRType t;
       IRType t;
@@ -1387,11 +1431,12 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
       }
       }
       if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
       if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
       if (ctype_isnum(ct->info)) {
       if (ctype_isnum(ct->info)) {
-	if (t == IRT_CDATA) goto trymeta;
-	if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J);
-	tr = emitir(IRT(IR_XLOAD, t), tr, 0);
-      } else if (!(ctype_isptr(ct->info) || ctype_isrefarray(ct->info))) {
-	goto trymeta;
+	if (t == IRT_CDATA) {
+	  tr = 0;
+	} else {
+	  if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J);
+	  tr = emitir(IRT(IR_XLOAD, t), tr, 0);
+	}
       }
       }
     } else if (tref_isnil(tr)) {
     } else if (tref_isnil(tr)) {
       tr = lj_ir_kptr(J, NULL);
       tr = lj_ir_kptr(J, NULL);
@@ -1411,10 +1456,17 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
 	  emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, str));
 	  emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, str));
 	  ct = ctype_child(cts, cct);
 	  ct = ctype_child(cts, cct);
 	  tr = lj_ir_kint(J, (int32_t)ofs);
 	  tr = lj_ir_kint(J, (int32_t)ofs);
-	}  /* else: interpreter will throw. */
-      }  /* else: interpreter will throw. */
+	} else {  /* Interpreter will throw or return false. */
+	  ct = ctype_get(cts, CTID_P_VOID);
+	}
+      } else if (ctype_isptr(ct->info)) {
+	tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCstr)));
+      } else {
+	ct = ctype_get(cts, CTID_P_VOID);
+      }
     } else if (!tref_isnum(tr)) {
     } else if (!tref_isnum(tr)) {
-      goto trymeta;
+      tr = 0;
+      ct = ctype_get(cts, CTID_P_VOID);
     }
     }
   ok:
   ok:
     s[i] = ct;
     s[i] = ct;
@@ -1422,22 +1474,19 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
   }
   }
   {
   {
     TRef tr;
     TRef tr;
-    if ((tr = crec_arith_int64(J, sp, s, (MMS)rd->data)) ||
-	(tr = crec_arith_ptr(J, sp, s, (MMS)rd->data))) {
-      J->base[0] = tr;
-      /* Fixup cdata comparisons, too. Avoids some cdata escapes. */
-      if (J->postproc == LJ_POST_FIXGUARD && frame_iscont(J->L->base-1) &&
-	  !irt_isguard(J->guardemit)) {
-	const BCIns *pc = frame_contpc(J->L->base-1) - 1;
-	if (bc_op(*pc) <= BC_ISNEP) {
-	  setframe_pc(&J2G(J)->tmptv, pc);
-	  J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1);
-	  J->postproc = LJ_POST_FIXCOMP;
-	}
+    if (!(tr = crec_arith_int64(J, sp, s, (MMS)rd->data)) &&
+	!(tr = crec_arith_ptr(J, sp, s, (MMS)rd->data)) &&
+	!(tr = crec_arith_meta(J, sp, s, cts, rd)))
+      return;
+    J->base[0] = tr;
+    /* Fixup cdata comparisons, too. Avoids some cdata escapes. */
+    if (J->postproc == LJ_POST_FIXGUARD && frame_iscont(J->L->base-1) &&
+	!irt_isguard(J->guardemit)) {
+      const BCIns *pc = frame_contpc(J->L->base-1) - 1;
+      if (bc_op(*pc) <= BC_ISNEP) {
+	J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc;
+	J->postproc = LJ_POST_FIXCOMP;
       }
       }
-    } else {
-    trymeta:
-      crec_arith_meta(J, cts, rd);
     }
     }
   }
   }
 }
 }
@@ -1624,7 +1673,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd)
 void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
 void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
 {
 {
   argv2cdata(J, J->base[0], &rd->argv[0]);
   argv2cdata(J, J->base[0], &rd->argv[0]);
-  crec_finalizer(J, J->base[0], &rd->argv[1]);
+  if (!J->base[1])
+    lj_trace_err(J, LJ_TRERR_BADTYPE);
+  crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]);
+}
+
+/* -- 64 bit bit.* library functions -------------------------------------- */
+
+/* Determine bit operation type from argument type. */
+static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
+{
+  if (tviscdata(tv)) {
+    CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
+    if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
+    if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
+	CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
+      return CTID_UINT64;  /* Use uint64_t, since it has the highest rank. */
+    return CTID_INT64;  /* Otherwise use int64_t. */
+  }
+  return 0;  /* Use regular 32 bit ops. */
+}
+
+void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
+		       J->base[0], &rd->argv[0]);
+  if (!tref_isinteger(tr))
+    tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
+  J->base[0] = tr;
+}
+
+int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
+  if (id) {
+    TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+    tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
+    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+    return 1;
+  }
+  return 0;
+}
+
+int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id = 0;
+  MSize i;
+  for (i = 0; J->base[i] != 0; i++) {
+    CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
+    if (id < aid) id = aid;  /* Determine highest type rank of all arguments. */
+  }
+  if (id) {
+    CType *ct = ctype_get(cts, id);
+    uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
+    TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
+    for (i = 1; J->base[i] != 0; i++) {
+      TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
+      tr = emitir(ot, tr, tr2);
+    }
+    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+    return 1;
+  }
+  return 0;
+}
+
+int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id;
+  TRef tsh = 0;
+  if (J->base[0] && tref_iscdata(J->base[1])) {
+    tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
+		     J->base[1], &rd->argv[1]);
+    if (!tref_isinteger(tsh))
+      tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
+    J->base[1] = tsh;
+  }
+  id = crec_bit64_type(cts, &rd->argv[0]);
+  if (id) {
+    TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+    uint32_t op = rd->data;
+    if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
+    if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
+	!tref_isk(tsh))
+      tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
+#ifdef LJ_TARGET_UNIFYROT
+      if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+	op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+	tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+      }
+#endif
+    tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
+    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+    return 1;
+  }
+  return 0;
+}
+
+TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
+  TRef tr, trsf = J->base[1];
+  SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
+  int32_t n;
+  if (trsf) {
+    CTypeID id2 = 0;
+    n = (int32_t)lj_carith_check64(J->L, 2, &id2);
+    if (id2)
+      trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]);
+    else
+      trsf = lj_opt_narrow_tobit(J, trsf);
+    emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n));  /* Specialize to n. */
+  } else {
+    n = id ? 16 : 8;
+  }
+  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
+  if (id) {
+    tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+    if (n < 16)
+      tr = emitir(IRT(IR_BAND, IRT_U64), tr,
+		  lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1));
+  } else {
+    tr = lj_opt_narrow_tobit(J, J->base[0]);
+    if (n < 8)
+      tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1)));
+    tr = emitconv(tr, IRT_U64, IRT_INT, 0);  /* No sign-extension. */
+    lj_needsplit(J);
+  }
+  return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr);
 }
 }
 
 
 /* -- Miscellaneous library functions ------------------------------------- */
 /* -- Miscellaneous library functions ------------------------------------- */

+ 8 - 1
Source/ThirdParty/LuaJIT/src/lj_crecord.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Trace recorder for C data operations.
 ** Trace recorder for C data operations.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_CRECORD_H
 #ifndef _LJ_CRECORD_H
@@ -25,6 +25,13 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
+
+LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
+LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
+
 LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
 #endif
 #endif
 
 

+ 11 - 8
Source/ThirdParty/LuaJIT/src/lj_ctype.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** C type management.
 ** C type management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
@@ -11,6 +11,7 @@
 #include "lj_err.h"
 #include "lj_err.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
+#include "lj_strfmt.h"
 #include "lj_ctype.h"
 #include "lj_ctype.h"
 #include "lj_ccallback.h"
 #include "lj_ccallback.h"
 
 
@@ -37,6 +38,8 @@
   _("uint64_t",			UINT64) \
   _("uint64_t",			UINT64) \
   _("intptr_t",			INT_PSZ) \
   _("intptr_t",			INT_PSZ) \
   _("uintptr_t",		UINT_PSZ) \
   _("uintptr_t",		UINT_PSZ) \
+  /* From POSIX. */ \
+  _("ssize_t",			INT_PSZ) \
   /* End of typedef list. */
   /* End of typedef list. */
 
 
 /* Keywords (only the ones we actually care for). */
 /* Keywords (only the ones we actually care for). */
@@ -568,19 +571,19 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
 /* Convert complex to string with 'i' or 'I' suffix. */
 /* Convert complex to string with 'i' or 'I' suffix. */
 GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
 GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
 {
 {
-  char buf[2*LJ_STR_NUMBUF+2+1];
+  char buf[2*STRFMT_MAXBUF_NUM+2+1], *p = buf;
   TValue re, im;
   TValue re, im;
-  size_t len;
   if (size == 2*sizeof(double)) {
   if (size == 2*sizeof(double)) {
     re.n = *(double *)sp; im.n = ((double *)sp)[1];
     re.n = *(double *)sp; im.n = ((double *)sp)[1];
   } else {
   } else {
     re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
     re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
   }
   }
-  len = lj_str_bufnum(buf, &re);
-  if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+';
-  len += lj_str_bufnum(buf+len, &im);
-  buf[len] = buf[len-1] >= 'a' ? 'I' : 'i';
-  return lj_str_new(L, buf, len+1);
+  p = lj_strfmt_wnum(p, &re);
+  if (!(im.u32.hi & 0x80000000u) || im.n != im.n) *p++ = '+';
+  p = lj_strfmt_wnum(p, &im);
+  *p = *(p-1) >= 'a' ? 'I' : 'i';
+  p++;
+  return lj_str_new(L, buf, p-buf);
 }
 }
 
 
 /* -- C type state -------------------------------------------------------- */
 /* -- C type state -------------------------------------------------------- */

+ 2 - 2
Source/ThirdParty/LuaJIT/src/lj_ctype.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** C type management.
 ** C type management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_CTYPE_H
 #ifndef _LJ_CTYPE_H
@@ -263,7 +263,7 @@ typedef struct CTState {
 /* -- Predefined types ---------------------------------------------------- */
 /* -- Predefined types ---------------------------------------------------- */
 
 
 /* Target-dependent types. */
 /* Target-dependent types. */
-#if LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#if LJ_TARGET_PPC
 #define CTTYDEFP(_) \
 #define CTTYDEFP(_) \
   _(LINT32,		4,	CT_NUM, CTF_LONG|CTALIGN(2))
   _(LINT32,		4,	CT_NUM, CTF_LONG|CTALIGN(2))
 #else
 #else

+ 142 - 46
Source/ThirdParty/LuaJIT/src/lj_debug.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Debugging and introspection.
 ** Debugging and introspection.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_debug_c
 #define lj_debug_c
@@ -9,11 +9,12 @@
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_err.h"
 #include "lj_err.h"
 #include "lj_debug.h"
 #include "lj_debug.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
 #include "lj_state.h"
 #include "lj_state.h"
 #include "lj_frame.h"
 #include "lj_frame.h"
 #include "lj_bc.h"
 #include "lj_bc.h"
+#include "lj_strfmt.h"
 #if LJ_HASJIT
 #if LJ_HASJIT
 #include "lj_jit.h"
 #include "lj_jit.h"
 #endif
 #endif
@@ -23,11 +24,11 @@
 /* Get frame corresponding to a level. */
 /* Get frame corresponding to a level. */
 cTValue *lj_debug_frame(lua_State *L, int level, int *size)
 cTValue *lj_debug_frame(lua_State *L, int level, int *size)
 {
 {
-  cTValue *frame, *nextframe, *bot = tvref(L->stack);
+  cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2;
   /* Traverse frames backwards. */
   /* Traverse frames backwards. */
   for (nextframe = frame = L->base-1; frame > bot; ) {
   for (nextframe = frame = L->base-1; frame > bot; ) {
     if (frame_gc(frame) == obj2gco(L))
     if (frame_gc(frame) == obj2gco(L))
-      level++;  /* Skip dummy frames. See lj_meta_call(). */
+      level++;  /* Skip dummy frames. See lj_err_optype_call(). */
     if (level-- == 0) {
     if (level-- == 0) {
       *size = (int)(nextframe - frame);
       *size = (int)(nextframe - frame);
       return frame;  /* Level found. */
       return frame;  /* Level found. */
@@ -86,7 +87,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
 	if (frame_islua(f)) {
 	if (frame_islua(f)) {
 	  f = frame_prevl(f);
 	  f = frame_prevl(f);
 	} else {
 	} else {
-	  if (frame_isc(f))
+	  if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f)))
 	    cf = cframe_raw(cframe_prev(cf));
 	    cf = cframe_raw(cframe_prev(cf));
 	  f = frame_prevd(f);
 	  f = frame_prevd(f);
 	}
 	}
@@ -140,38 +141,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
 
 
 /* -- Variable names ------------------------------------------------------ */
 /* -- Variable names ------------------------------------------------------ */
 
 
-/* Read ULEB128 value. */
-static uint32_t debug_read_uleb128(const uint8_t **pp)
-{
-  const uint8_t *p = *pp;
-  uint32_t v = *p++;
-  if (LJ_UNLIKELY(v >= 0x80)) {
-    int sh = 0;
-    v &= 0x7f;
-    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
-  }
-  *pp = p;
-  return v;
-}
-
 /* Get name of a local variable from slot number and PC. */
 /* Get name of a local variable from slot number and PC. */
 static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
 static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
 {
 {
-  const uint8_t *p = proto_varinfo(pt);
+  const char *p = (const char *)proto_varinfo(pt);
   if (p) {
   if (p) {
     BCPos lastpc = 0;
     BCPos lastpc = 0;
     for (;;) {
     for (;;) {
-      const char *name = (const char *)p;
-      uint32_t vn = *p++;
+      const char *name = p;
+      uint32_t vn = *(const uint8_t *)p;
       BCPos startpc, endpc;
       BCPos startpc, endpc;
       if (vn < VARNAME__MAX) {
       if (vn < VARNAME__MAX) {
 	if (vn == VARNAME_END) break;  /* End of varinfo. */
 	if (vn == VARNAME_END) break;  /* End of varinfo. */
       } else {
       } else {
-	while (*p++) ;  /* Skip over variable name string. */
+	do { p++; } while (*(const uint8_t *)p);  /* Skip over variable name. */
       }
       }
-      lastpc = startpc = lastpc + debug_read_uleb128(&p);
+      p++;
+      lastpc = startpc = lastpc + lj_buf_ruleb128(&p);
       if (startpc > pc) break;
       if (startpc > pc) break;
-      endpc = startpc + debug_read_uleb128(&p);
+      endpc = startpc + lj_buf_ruleb128(&p);
       if (pc < endpc && slot-- == 0) {
       if (pc < endpc && slot-- == 0) {
 	if (vn < VARNAME__MAX) {
 	if (vn < VARNAME__MAX) {
 #define VARNAMESTR(name, str)	str "\0"
 #define VARNAMESTR(name, str)	str "\0"
@@ -196,7 +184,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
   TValue *nextframe = size ? frame + size : NULL;
   TValue *nextframe = size ? frame + size : NULL;
   GCfunc *fn = frame_func(frame);
   GCfunc *fn = frame_func(frame);
   BCPos pc = debug_framepc(L, fn, nextframe);
   BCPos pc = debug_framepc(L, fn, nextframe);
-  if (!nextframe) nextframe = L->top;
+  if (!nextframe) nextframe = L->top+LJ_FR2;
   if ((int)slot1 < 0) {  /* Negative slot number is for varargs. */
   if ((int)slot1 < 0) {  /* Negative slot number is for varargs. */
     if (pc != NO_BCPOS) {
     if (pc != NO_BCPOS) {
       GCproto *pt = funcproto(fn);
       GCproto *pt = funcproto(fn);
@@ -206,7 +194,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
 	  nextframe = frame;
 	  nextframe = frame;
 	  frame = frame_prevd(frame);
 	  frame = frame_prevd(frame);
 	}
 	}
-	if (frame + slot1 < nextframe) {
+	if (frame + slot1+LJ_FR2 < nextframe) {
 	  *name = "(*vararg)";
 	  *name = "(*vararg)";
 	  return frame+slot1;
 	  return frame+slot1;
 	}
 	}
@@ -217,7 +205,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
   if (pc != NO_BCPOS &&
   if (pc != NO_BCPOS &&
       (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL)
       (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL)
     ;
     ;
-  else if (slot1 > 0 && frame + slot1 < nextframe)
+  else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe)
     *name = "(*temporary)";
     *name = "(*temporary)";
   return frame+slot1;
   return frame+slot1;
 }
 }
@@ -280,7 +268,7 @@ restart:
 	*name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins))));
 	*name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins))));
 	if (ip > proto_bc(pt)) {
 	if (ip > proto_bc(pt)) {
 	  BCIns insp = ip[-1];
 	  BCIns insp = ip[-1];
-	  if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 &&
+	  if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 &&
 	      bc_d(insp) == bc_b(ins))
 	      bc_d(insp) == bc_b(ins))
 	    return "method";
 	    return "method";
 	}
 	}
@@ -297,12 +285,12 @@ restart:
 }
 }
 
 
 /* Deduce function name from caller of a frame. */
 /* Deduce function name from caller of a frame. */
-const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
+const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
 {
 {
-  TValue *pframe;
+  cTValue *pframe;
   GCfunc *fn;
   GCfunc *fn;
   BCPos pc;
   BCPos pc;
-  if (frame <= tvref(L->stack))
+  if (frame <= tvref(L->stack)+LJ_FR2)
     return NULL;
     return NULL;
   if (frame_isvarg(frame))
   if (frame_isvarg(frame))
     frame = frame_prevd(frame);
     frame = frame_prevd(frame);
@@ -328,7 +316,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
 /* -- Source code locations ----------------------------------------------- */
 /* -- Source code locations ----------------------------------------------- */
 
 
 /* Generate shortened source name. */
 /* Generate shortened source name. */
-void lj_debug_shortname(char *out, GCstr *str)
+void lj_debug_shortname(char *out, GCstr *str, BCLine line)
 {
 {
   const char *src = strdata(str);
   const char *src = strdata(str);
   if (*src == '=') {
   if (*src == '=') {
@@ -342,11 +330,11 @@ void lj_debug_shortname(char *out, GCstr *str)
       *out++ = '.'; *out++ = '.'; *out++ = '.';
       *out++ = '.'; *out++ = '.'; *out++ = '.';
     }
     }
     strcpy(out, src);
     strcpy(out, src);
-  } else {  /* Output [string "string"]. */
+  } else {  /* Output [string "string"] or [builtin:name]. */
     size_t len;  /* Length, up to first control char. */
     size_t len;  /* Length, up to first control char. */
     for (len = 0; len < LUA_IDSIZE-12; len++)
     for (len = 0; len < LUA_IDSIZE-12; len++)
       if (((const unsigned char *)src)[len] < ' ') break;
       if (((const unsigned char *)src)[len] < ' ') break;
-    strcpy(out, "[string \""); out += 9;
+    strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
     if (src[len] != '\0') {  /* Must truncate? */
     if (src[len] != '\0') {  /* Must truncate? */
       if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
       if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
       strncpy(out, src, len); out += len;
       strncpy(out, src, len); out += len;
@@ -354,7 +342,7 @@ void lj_debug_shortname(char *out, GCstr *str)
     } else {
     } else {
       strcpy(out, src); out += len;
       strcpy(out, src); out += len;
     }
     }
-    strcpy(out, "\"]");
+    strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
   }
   }
 }
 }
 
 
@@ -367,14 +355,15 @@ void lj_debug_addloc(lua_State *L, const char *msg,
     if (isluafunc(fn)) {
     if (isluafunc(fn)) {
       BCLine line = debug_frameline(L, fn, nextframe);
       BCLine line = debug_frameline(L, fn, nextframe);
       if (line >= 0) {
       if (line >= 0) {
+	GCproto *pt = funcproto(fn);
 	char buf[LUA_IDSIZE];
 	char buf[LUA_IDSIZE];
-	lj_debug_shortname(buf, proto_chunkname(funcproto(fn)));
-	lj_str_pushf(L, "%s:%d: %s", buf, line, msg);
+	lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
+	lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg);
 	return;
 	return;
       }
       }
     }
     }
   }
   }
-  lj_str_pushf(L, "%s", msg);
+  lj_strfmt_pushf(L, "%s", msg);
 }
 }
 
 
 /* Push location string for a bytecode position to Lua stack. */
 /* Push location string for a bytecode position to Lua stack. */
@@ -384,20 +373,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
   const char *s = strdata(name);
   const char *s = strdata(name);
   MSize i, len = name->len;
   MSize i, len = name->len;
   BCLine line = lj_debug_line(pt, pc);
   BCLine line = lj_debug_line(pt, pc);
-  if (*s == '@') {
+  if (pt->firstline == ~(BCLine)0) {
+    lj_strfmt_pushf(L, "builtin:%s", s);
+  } else if (*s == '@') {
     s++; len--;
     s++; len--;
     for (i = len; i > 0; i--)
     for (i = len; i > 0; i--)
       if (s[i] == '/' || s[i] == '\\') {
       if (s[i] == '/' || s[i] == '\\') {
 	s += i+1;
 	s += i+1;
 	break;
 	break;
       }
       }
-    lj_str_pushf(L, "%s:%d", s, line);
+    lj_strfmt_pushf(L, "%s:%d", s, line);
   } else if (len > 40) {
   } else if (len > 40) {
-    lj_str_pushf(L, "%p:%d", pt, line);
+    lj_strfmt_pushf(L, "%p:%d", pt, line);
   } else if (*s == '=') {
   } else if (*s == '=') {
-    lj_str_pushf(L, "%s:%d", s+1, line);
+    lj_strfmt_pushf(L, "%s:%d", s+1, line);
   } else {
   } else {
-    lj_str_pushf(L, "\"%s\":%d", s, line);
+    lj_strfmt_pushf(L, "\"%s\":%d", s, line);
   }
   }
 }
 }
 
 
@@ -460,10 +451,10 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
 	BCLine firstline = pt->firstline;
 	BCLine firstline = pt->firstline;
 	GCstr *name = proto_chunkname(pt);
 	GCstr *name = proto_chunkname(pt);
 	ar->source = strdata(name);
 	ar->source = strdata(name);
-	lj_debug_shortname(ar->short_src, name);
+	lj_debug_shortname(ar->short_src, name, pt->firstline);
 	ar->linedefined = (int)firstline;
 	ar->linedefined = (int)firstline;
 	ar->lastlinedefined = (int)(firstline + pt->numline);
 	ar->lastlinedefined = (int)(firstline + pt->numline);
-	ar->what = firstline ? "Lua" : "main";
+	ar->what = (firstline || !pt->numline) ? "Lua" : "main";
       } else {
       } else {
 	ar->source = "=[C]";
 	ar->source = "=[C]";
 	ar->short_src[0] = '[';
 	ar->short_src[0] = '[';
@@ -550,6 +541,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
   }
   }
 }
 }
 
 
+#if LJ_HASPROFILE
+/* Put the chunkname into a buffer. */
+static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip)
+{
+  GCstr *name = proto_chunkname(pt);
+  const char *p = strdata(name);
+  if (pt->firstline == ~(BCLine)0) {
+    lj_buf_putmem(sb, "[builtin:", 9);
+    lj_buf_putstr(sb, name);
+    lj_buf_putb(sb, ']');
+    return 0;
+  }
+  if (*p == '=' || *p == '@') {
+    MSize len = name->len-1;
+    p++;
+    if (pathstrip) {
+      int i;
+      for (i = len-1; i >= 0; i--)
+	if (p[i] == '/' || p[i] == '\\') {
+	  len -= i+1;
+	  p = p+i+1;
+	  break;
+	}
+    }
+    lj_buf_putmem(sb, p, len);
+  } else {
+    lj_buf_putmem(sb, "[string]", 8);
+  }
+  return 1;
+}
+
+/* Put a compact stack dump into a buffer. */
+void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
+{
+  int level = 0, dir = 1, pathstrip = 1;
+  MSize lastlen = 0;
+  if (depth < 0) { level = ~depth; depth = dir = -1; }  /* Reverse frames. */
+  while (level != depth) {  /* Loop through all frame. */
+    int size;
+    cTValue *frame = lj_debug_frame(L, level, &size);
+    if (frame) {
+      cTValue *nextframe = size ? frame+size : NULL;
+      GCfunc *fn = frame_func(frame);
+      const uint8_t *p = (const uint8_t *)fmt;
+      int c;
+      while ((c = *p++)) {
+	switch (c) {
+	case 'p':  /* Preserve full path. */
+	  pathstrip = 0;
+	  break;
+	case 'F': case 'f': {  /* Dump function name. */
+	  const char *name;
+	  const char *what = lj_debug_funcname(L, frame, &name);
+	  if (what) {
+	    if (c == 'F' && isluafunc(fn)) {  /* Dump module:name for 'F'. */
+	      GCproto *pt = funcproto(fn);
+	      if (pt->firstline != ~(BCLine)0) {  /* Not a bytecode builtin. */
+		debug_putchunkname(sb, pt, pathstrip);
+		lj_buf_putb(sb, ':');
+	      }
+	    }
+	    lj_buf_putmem(sb, name, (MSize)strlen(name));
+	    break;
+	  }  /* else: can't derive a name, dump module:line. */
+	  }
+	  /* fallthrough */
+	case 'l':  /* Dump module:line. */
+	  if (isluafunc(fn)) {
+	    GCproto *pt = funcproto(fn);
+	    if (debug_putchunkname(sb, pt, pathstrip)) {
+	      /* Regular Lua function. */
+	      BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
+				       pt->firstline;
+	      lj_buf_putb(sb, ':');
+	      lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
+	    }
+	  } else if (isffunc(fn)) {  /* Dump numbered builtins. */
+	    lj_buf_putmem(sb, "[builtin#", 9);
+	    lj_strfmt_putint(sb, fn->c.ffid);
+	    lj_buf_putb(sb, ']');
+	  } else {  /* Dump C function address. */
+	    lj_buf_putb(sb, '@');
+	    lj_strfmt_putptr(sb, fn->c.f);
+	  }
+	  break;
+	case 'Z':  /* Zap trailing separator. */
+	  lastlen = sbuflen(sb);
+	  break;
+	default:
+	  lj_buf_putb(sb, c);
+	  break;
+	}
+      }
+    } else if (dir == 1) {
+      break;
+    } else {
+      level -= size;  /* Reverse frame order: quickly skip missing level. */
+    }
+    level += dir;
+  }
+  if (lastlen)
+    setsbufP(sb, sbufB(sb) + lastlen);  /* Zap trailing separator. */
+}
+#endif
+
 /* Number of frames for the leading and trailing part of a traceback. */
 /* Number of frames for the leading and trailing part of a traceback. */
 #define TRACEBACK_LEVELS1	12
 #define TRACEBACK_LEVELS1	12
 #define TRACEBACK_LEVELS2	10
 #define TRACEBACK_LEVELS2	10

+ 7 - 3
Source/ThirdParty/LuaJIT/src/lj_debug.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Debugging and introspection.
 ** Debugging and introspection.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_DEBUG_H
 #ifndef _LJ_DEBUG_H
@@ -32,14 +32,18 @@ LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
 LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp);
 LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp);
 LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
 LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
 				      BCReg slot, const char **name);
 				      BCReg slot, const char **name);
-LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame,
+LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
 				      const char **name);
 				      const char **name);
-LJ_FUNC void lj_debug_shortname(char *out, GCstr *str);
+LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
 LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
 LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
 			     cTValue *frame, cTValue *nextframe);
 			     cTValue *frame, cTValue *nextframe);
 LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
 LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
 LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
 LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
 			     int ext);
 			     int ext);
+#if LJ_HASPROFILE
+LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
+				int depth);
+#endif
 
 
 /* Fixed internal variable names. */
 /* Fixed internal variable names. */
 #define VARNAMEDEF(_) \
 #define VARNAMEDEF(_) \

+ 24 - 8
Source/ThirdParty/LuaJIT/src/lj_def.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** LuaJIT common internal definitions.
 ** LuaJIT common internal definitions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_DEF_H
 #ifndef _LJ_DEF_H
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t;
 #include <stdlib.h>
 #include <stdlib.h>
 
 
 /* Various VM limits. */
 /* Various VM limits. */
-#define LJ_MAX_MEM	0x7fffff00	/* Max. total memory allocation. */
+#define LJ_MAX_MEM32	0x7fffff00	/* Max. 32 bit memory allocation. */
+#define LJ_MAX_MEM64	((uint64_t)1<<47)  /* Max. 64 bit memory allocation. */
+/* Max. total memory allocation. */
+#define LJ_MAX_MEM	(LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32)
 #define LJ_MAX_ALLOC	LJ_MAX_MEM	/* Max. individual allocation length. */
 #define LJ_MAX_ALLOC	LJ_MAX_MEM	/* Max. individual allocation length. */
-#define LJ_MAX_STR	LJ_MAX_MEM	/* Max. string length. */
-#define LJ_MAX_UDATA	LJ_MAX_MEM	/* Max. userdata length. */
+#define LJ_MAX_STR	LJ_MAX_MEM32	/* Max. string length. */
+#define LJ_MAX_BUF	LJ_MAX_MEM32	/* Max. buffer length. */
+#define LJ_MAX_UDATA	LJ_MAX_MEM32	/* Max. userdata length. */
 
 
 #define LJ_MAX_STRTAB	(1<<26)		/* Max. string table size. */
 #define LJ_MAX_STRTAB	(1<<26)		/* Max. string table size. */
 #define LJ_MAX_HBITS	26		/* Max. hash bits. */
 #define LJ_MAX_HBITS	26		/* Max. hash bits. */
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t;
 #define LJ_MAX_ASIZE	((1<<(LJ_MAX_ABITS-1))+1)  /* Max. array part size. */
 #define LJ_MAX_ASIZE	((1<<(LJ_MAX_ABITS-1))+1)  /* Max. array part size. */
 #define LJ_MAX_COLOSIZE	16		/* Max. elems for colocated array. */
 #define LJ_MAX_COLOSIZE	16		/* Max. elems for colocated array. */
 
 
-#define LJ_MAX_LINE	LJ_MAX_MEM	/* Max. source code line number. */
+#define LJ_MAX_LINE	LJ_MAX_MEM32	/* Max. source code line number. */
 #define LJ_MAX_XLEVEL	200		/* Max. syntactic nesting level. */
 #define LJ_MAX_XLEVEL	200		/* Max. syntactic nesting level. */
 #define LJ_MAX_BCINS	(1<<26)		/* Max. # of bytecode instructions. */
 #define LJ_MAX_BCINS	(1<<26)		/* Max. # of bytecode instructions. */
 #define LJ_MAX_SLOTS	250		/* Max. # of slots in a Lua func. */
 #define LJ_MAX_SLOTS	250		/* Max. # of slots in a Lua func. */
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t;
 #define LJ_MAX_UPVAL	60		/* Max. # of upvalues. */
 #define LJ_MAX_UPVAL	60		/* Max. # of upvalues. */
 
 
 #define LJ_MAX_IDXCHAIN	100		/* __index/__newindex chain limit. */
 #define LJ_MAX_IDXCHAIN	100		/* __index/__newindex chain limit. */
-#define LJ_STACK_EXTRA	5		/* Extra stack space (metamethods). */
+#define LJ_STACK_EXTRA	(5+2*LJ_FR2)	/* Extra stack space (metamethods). */
 
 
 #define LJ_NUM_CBPAGE	1		/* Number of FFI callback pages. */
 #define LJ_NUM_CBPAGE	1		/* Number of FFI callback pages. */
 
 
@@ -99,6 +103,14 @@ typedef unsigned int uintptr_t;
 #define checki32(x)	((x) == (int32_t)(x))
 #define checki32(x)	((x) == (int32_t)(x))
 #define checku32(x)	((x) == (uint32_t)(x))
 #define checku32(x)	((x) == (uint32_t)(x))
 #define checkptr32(x)	((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
 #define checkptr32(x)	((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
+#define checkptr47(x)	(((uint64_t)(x) >> 47) == 0)
+#if LJ_GC64
+#define checkptrGC(x)	(checkptr47((x)))
+#elif LJ_64
+#define checkptrGC(x)	(checkptr32((x)))
+#else
+#define checkptrGC(x)	1
+#endif
 
 
 /* Every half-decent C compiler transforms this into a rotate instruction. */
 /* Every half-decent C compiler transforms this into a rotate instruction. */
 #define lj_rol(x, n)	(((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
 #define lj_rol(x, n)	(((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
@@ -111,7 +123,7 @@ typedef uintptr_t BloomFilter;
 #define bloomset(b, x)	((b) |= bloombit((x)))
 #define bloomset(b, x)	((b) |= bloombit((x)))
 #define bloomtest(b, x)	((b) & bloombit((x)))
 #define bloomtest(b, x)	((b) & bloombit((x)))
 
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__psp2__)
 
 
 #define LJ_NORET	__attribute__((noreturn))
 #define LJ_NORET	__attribute__((noreturn))
 #define LJ_ALIGN(n)	__attribute__((aligned(n)))
 #define LJ_ALIGN(n)	__attribute__((aligned(n)))
@@ -119,7 +131,7 @@ typedef uintptr_t BloomFilter;
 #define LJ_AINLINE	inline __attribute__((always_inline))
 #define LJ_AINLINE	inline __attribute__((always_inline))
 #define LJ_NOINLINE	__attribute__((noinline))
 #define LJ_NOINLINE	__attribute__((noinline))
 
 
-#if defined(__ELF__) || defined(__MACH__)
+#if defined(__ELF__) || defined(__MACH__) || defined(__psp2__)
 #if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__))
 #if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__))
 #define LJ_NOAPI	extern __attribute__((visibility("hidden")))
 #define LJ_NOAPI	extern __attribute__((visibility("hidden")))
 #endif
 #endif
@@ -150,6 +162,9 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
 #if defined(__arm__)
 #if defined(__arm__)
 static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
 static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
 {
 {
+#if defined(__psp2__)
+  return __builtin_rev(x);
+#else
   uint32_t r;
   uint32_t r;
 #if __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6T2__ || __ARM_ARCH_6Z__ ||\
 #if __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6T2__ || __ARM_ARCH_6Z__ ||\
     __ARM_ARCH_6ZK__ || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__
     __ARM_ARCH_6ZK__ || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__
@@ -163,6 +178,7 @@ static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
 #endif
 #endif
   return ((r & 0xff00ffffu) >> 8) ^ lj_ror(x, 8);
   return ((r & 0xff00ffffu) >> 8) ^ lj_ror(x, 8);
 #endif
 #endif
+#endif
 }
 }
 
 
 static LJ_AINLINE uint64_t lj_bswap64(uint64_t x)
 static LJ_AINLINE uint64_t lj_bswap64(uint64_t x)

+ 73 - 10
Source/ThirdParty/LuaJIT/src/lj_dispatch.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Instruction dispatch handling.
 ** Instruction dispatch handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_dispatch_c
 #define lj_dispatch_c
@@ -8,6 +8,7 @@
 
 
 #include "lj_obj.h"
 #include "lj_obj.h"
 #include "lj_err.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_func.h"
 #include "lj_func.h"
 #include "lj_str.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_tab.h"
@@ -17,6 +18,7 @@
 #include "lj_frame.h"
 #include "lj_frame.h"
 #include "lj_bc.h"
 #include "lj_bc.h"
 #include "lj_ff.h"
 #include "lj_ff.h"
+#include "lj_strfmt.h"
 #if LJ_HASJIT
 #if LJ_HASJIT
 #include "lj_jit.h"
 #include "lj_jit.h"
 #endif
 #endif
@@ -25,6 +27,9 @@
 #endif
 #endif
 #include "lj_trace.h"
 #include "lj_trace.h"
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
+#if LJ_HASPROFILE
+#include "lj_profile.h"
+#endif
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "luajit.h"
 #include "luajit.h"
 
 
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
 #include <math.h>
 #include <math.h>
 LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
 LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
 							  lua_State *co);
 							  lua_State *co);
+#if !LJ_HASJIT
+#define lj_dispatch_stitch	lj_dispatch_ins
+#endif
+#if !LJ_HASPROFILE
+#define lj_dispatch_profile	lj_dispatch_ins
+#endif
 
 
 #define GOTFUNC(name)	(ASMFunction)name,
 #define GOTFUNC(name)	(ASMFunction)name,
 static const ASMFunction dispatch_got[] = {
 static const ASMFunction dispatch_got[] = {
@@ -82,11 +93,12 @@ void lj_dispatch_init_hotcount(global_State *g)
 #endif
 #endif
 
 
 /* Internal dispatch mode bits. */
 /* Internal dispatch mode bits. */
-#define DISPMODE_JIT	0x01	/* JIT compiler on. */
-#define DISPMODE_REC	0x02	/* Recording active. */
+#define DISPMODE_CALL	0x01	/* Override call dispatch. */
+#define DISPMODE_RET	0x02	/* Override return dispatch. */
 #define DISPMODE_INS	0x04	/* Override instruction dispatch. */
 #define DISPMODE_INS	0x04	/* Override instruction dispatch. */
-#define DISPMODE_CALL	0x08	/* Override call dispatch. */
-#define DISPMODE_RET	0x10	/* Override return dispatch. */
+#define DISPMODE_JIT	0x10	/* JIT compiler on. */
+#define DISPMODE_REC	0x20	/* Recording active. */
+#define DISPMODE_PROF	0x40	/* Profiling active. */
 
 
 /* Update dispatch table depending on various flags. */
 /* Update dispatch table depending on various flags. */
 void lj_dispatch_update(global_State *g)
 void lj_dispatch_update(global_State *g)
@@ -97,6 +109,9 @@ void lj_dispatch_update(global_State *g)
   mode |= (G2J(g)->flags & JIT_F_ON) ? DISPMODE_JIT : 0;
   mode |= (G2J(g)->flags & JIT_F_ON) ? DISPMODE_JIT : 0;
   mode |= G2J(g)->state != LJ_TRACE_IDLE ?
   mode |= G2J(g)->state != LJ_TRACE_IDLE ?
 	    (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
 	    (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
+#endif
+#if LJ_HASPROFILE
+  mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0;
 #endif
 #endif
   mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
   mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
   mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
   mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
@@ -126,9 +141,9 @@ void lj_dispatch_update(global_State *g)
     disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
     disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
 
 
     /* Set dynamic instruction dispatch. */
     /* Set dynamic instruction dispatch. */
-    if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) {
+    if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) {
       /* Need to update the whole table. */
       /* Need to update the whole table. */
-      if (!(mode & (DISPMODE_REC|DISPMODE_INS))) {  /* No ins dispatch? */
+      if (!(mode & DISPMODE_INS)) {  /* No ins dispatch? */
 	/* Copy static dispatch table to dynamic dispatch table. */
 	/* Copy static dispatch table to dynamic dispatch table. */
 	memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
 	memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
 	/* Overwrite with dynamic return dispatch. */
 	/* Overwrite with dynamic return dispatch. */
@@ -140,12 +155,13 @@ void lj_dispatch_update(global_State *g)
 	}
 	}
       } else {
       } else {
 	/* The recording dispatch also checks for hooks. */
 	/* The recording dispatch also checks for hooks. */
-	ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
+	ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook :
+			(mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
 	uint32_t i;
 	uint32_t i;
 	for (i = 0; i < GG_LEN_SDISP; i++)
 	for (i = 0; i < GG_LEN_SDISP; i++)
 	  disp[i] = f;
 	  disp[i] = f;
       }
       }
-    } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) {
+    } else if (!(mode & DISPMODE_INS)) {
       /* Otherwise set dynamic counting ins. */
       /* Otherwise set dynamic counting ins. */
       disp[BC_FORL] = f_forl;
       disp[BC_FORL] = f_forl;
       disp[BC_ITERL] = f_iterl;
       disp[BC_ITERL] = f_iterl;
@@ -352,10 +368,19 @@ static void callhook(lua_State *L, int event, BCLine line)
     /* Top frame, nextframe = NULL. */
     /* Top frame, nextframe = NULL. */
     ar.i_ci = (int)((L->base-1) - tvref(L->stack));
     ar.i_ci = (int)((L->base-1) - tvref(L->stack));
     lj_state_checkstack(L, 1+LUA_MINSTACK);
     lj_state_checkstack(L, 1+LUA_MINSTACK);
+#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
+    lj_profile_hook_enter(g);
+#else
     hook_enter(g);
     hook_enter(g);
+#endif
     hookf(L, &ar);
     hookf(L, &ar);
     lua_assert(hook_active(g));
     lua_assert(hook_active(g));
+    setgcref(g->cur_L, obj2gco(L));
+#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
+    lj_profile_hook_leave(g);
+#else
     hook_leave(g);
     hook_leave(g);
+#endif
   }
   }
 }
 }
 
 
@@ -368,7 +393,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
   if (bc_op(ins) == BC_UCLO)
   if (bc_op(ins) == BC_UCLO)
     ins = pc[bc_j(ins)];
     ins = pc[bc_j(ins)];
   switch (bc_op(ins)) {
   switch (bc_op(ins)) {
-  case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1;
+  case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2;
   case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
   case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
   case BC_TSETM: return bc_a(ins) + nres-1;
   case BC_TSETM: return bc_a(ins) + nres-1;
   default: return pt->framesize;
   default: return pt->framesize;
@@ -492,3 +517,41 @@ out:
   return makeasmfunc(lj_bc_ofs[op]);  /* Return static dispatch target. */
   return makeasmfunc(lj_bc_ofs[op]);  /* Return static dispatch target. */
 }
 }
 
 
+#if LJ_HASJIT
+/* Stitch a new trace. */
+void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
+{
+  ERRNO_SAVE
+  lua_State *L = J->L;
+  void *cf = cframe_raw(L->cframe);
+  const BCIns *oldpc = cframe_pc(cf);
+  setcframe_pc(cf, pc);
+  /* Before dispatch, have to bias PC by 1. */
+  L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
+  lj_trace_stitch(J, pc-1);  /* Point to the CALL instruction. */
+  setcframe_pc(cf, oldpc);
+  ERRNO_RESTORE
+}
+#endif
+
+#if LJ_HASPROFILE
+/* Profile dispatch. */
+void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
+{
+  ERRNO_SAVE
+  GCfunc *fn = curr_func(L);
+  GCproto *pt = funcproto(fn);
+  void *cf = cframe_raw(L->cframe);
+  const BCIns *oldpc = cframe_pc(cf);
+  global_State *g;
+  setcframe_pc(cf, pc);
+  L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf));
+  lj_profile_interpreter(L);
+  setcframe_pc(cf, oldpc);
+  g = G(L);
+  setgcref(g->cur_L, obj2gco(L));
+  setvmstate(g, INTERP);
+  ERRNO_RESTORE
+}
+#endif
+

+ 32 - 9
Source/ThirdParty/LuaJIT/src/lj_dispatch.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Instruction dispatch handling.
 ** Instruction dispatch handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_DISPATCH_H
 #ifndef _LJ_DISPATCH_H
@@ -14,6 +14,21 @@
 
 
 #if LJ_TARGET_MIPS
 #if LJ_TARGET_MIPS
 /* Need our own global offset table for the dreaded MIPS calling conventions. */
 /* Need our own global offset table for the dreaded MIPS calling conventions. */
+#if LJ_SOFTFP
+extern double __adddf3(double a, double b);
+extern double __subdf3(double a, double b);
+extern double __muldf3(double a, double b);
+extern double __divdf3(double a, double b);
+extern void __ledf2(double a, double b);
+extern double __floatsidf(int32_t a);
+extern int32_t __fixdfsi(double a);
+
+#define SFGOTDEF(_) \
+  _(lj_num2bit) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) _(__ledf2) \
+  _(__floatsidf) _(__fixdfsi)
+#else
+#define SFGOTDEF(_)
+#endif
 #if LJ_HASJIT
 #if LJ_HASJIT
 #define JITGOTDEF(_)	_(lj_trace_exit) _(lj_trace_hot)
 #define JITGOTDEF(_)	_(lj_trace_exit) _(lj_trace_hot)
 #else
 #else
@@ -29,15 +44,18 @@
   _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
   _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
   _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
   _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
   _(pow) _(fmod) _(ldexp) \
   _(pow) _(fmod) _(ldexp) \
-  _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) \
+  _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
+  _(lj_dispatch_profile) _(lj_err_throw) \
   _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
   _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
   _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
   _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
   _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
   _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
-  _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \
-  _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \
-  _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \
-  _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
-  JITGOTDEF(_) FFIGOTDEF(_)
+  _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
+  _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_num) \
+  _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
+  _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
+  _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
+  _(lj_buf_putstr_upper) _(lj_buf_tostr) \
+  JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_)
 
 
 enum {
 enum {
 #define GOTENUM(name) LJ_GOT_##name,
 #define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +78,7 @@ typedef uint16_t HotCount;
 #define HOTCOUNT_CALL		1
 #define HOTCOUNT_CALL		1
 
 
 /* This solves a circular dependency problem -- bump as needed. Sigh. */
 /* This solves a circular dependency problem -- bump as needed. Sigh. */
-#define GG_NUM_ASMFF	62
+#define GG_NUM_ASMFF	57
 
 
 #define GG_LEN_DDISP	(BC__MAX + GG_NUM_ASMFF)
 #define GG_LEN_DDISP	(BC__MAX + GG_NUM_ASMFF)
 #define GG_LEN_SDISP	BC_FUNCF
 #define GG_LEN_SDISP	BC_FUNCF
@@ -109,7 +127,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
 /* Instruction dispatch callback for hooks or when recording. */
 /* Instruction dispatch callback for hooks or when recording. */
 LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
 LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
 LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
 LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
-LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc);
+#if LJ_HASJIT
+LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
+#endif
+#if LJ_HASPROFILE
+LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
+#endif
 
 
 #if LJ_HASFFI && !defined(_BUILDVM_H)
 #if LJ_HASFFI && !defined(_BUILDVM_H)
 /* Save/restore errno and GetLastError() around hooks, exits and recording. */
 /* Save/restore errno and GetLastError() around hooks, exits and recording. */

+ 9 - 9
Source/ThirdParty/LuaJIT/src/lj_emit_arm.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** ARM instruction emitter.
 ** ARM instruction emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Constant encoding --------------------------------------------------- */
 /* -- Constant encoding --------------------------------------------------- */
@@ -308,30 +308,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
   emit_dm(as, ARMI_MOV, dst, src);
   emit_dm(as, ARMI_MOV, dst, src);
 }
 }
 
 
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 {
 #if LJ_SOFTFP
 #if LJ_SOFTFP
   lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
   lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
 #else
 #else
   if (r >= RID_MAX_GPR)
   if (r >= RID_MAX_GPR)
-    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs);
+    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
   else
   else
 #endif
 #endif
-    emit_lso(as, ARMI_LDR, r, RID_SP, ofs);
+    emit_lso(as, ARMI_LDR, r, base, ofs);
 }
 }
 
 
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 {
 #if LJ_SOFTFP
 #if LJ_SOFTFP
   lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
   lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
 #else
 #else
   if (r >= RID_MAX_GPR)
   if (r >= RID_MAX_GPR)
-    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs);
+    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
   else
   else
 #endif
 #endif
-    emit_lso(as, ARMI_STR, r, RID_SP, ofs);
+    emit_lso(as, ARMI_STR, r, base, ofs);
 }
 }
 
 
 /* Emit an arithmetic/logic operation with a constant operand. */
 /* Emit an arithmetic/logic operation with a constant operand. */

+ 9 - 9
Source/ThirdParty/LuaJIT/src/lj_emit_mips.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** MIPS instruction emitter.
 ** MIPS instruction emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Emit basic instructions --------------------------------------------- */
 /* -- Emit basic instructions --------------------------------------------- */
@@ -178,24 +178,24 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
     emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
     emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
 }
 }
 
 
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 {
   if (r < RID_MAX_GPR)
   if (r < RID_MAX_GPR)
-    emit_tsi(as, MIPSI_LW, r, RID_SP, ofs);
+    emit_tsi(as, MIPSI_LW, r, base, ofs);
   else
   else
     emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
     emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
-	     (r & 31), RID_SP, ofs);
+	     (r & 31), base, ofs);
 }
 }
 
 
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 {
   if (r < RID_MAX_GPR)
   if (r < RID_MAX_GPR)
-    emit_tsi(as, MIPSI_SW, r, RID_SP, ofs);
+    emit_tsi(as, MIPSI_SW, r, base, ofs);
   else
   else
     emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
     emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
-	     (r&31), RID_SP, ofs);
+	     (r&31), base, ofs);
 }
 }
 
 
 /* Add offset to pointer. */
 /* Add offset to pointer. */

+ 9 - 9
Source/ThirdParty/LuaJIT/src/lj_emit_ppc.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** PPC instruction emitter.
 ** PPC instruction emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Emit basic instructions --------------------------------------------- */
 /* -- Emit basic instructions --------------------------------------------- */
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
     emit_fb(as, PPCI_FMR, dst, src);
     emit_fb(as, PPCI_FMR, dst, src);
 }
 }
 
 
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 {
   if (r < RID_MAX_GPR)
   if (r < RID_MAX_GPR)
-    emit_tai(as, PPCI_LWZ, r, RID_SP, ofs);
+    emit_tai(as, PPCI_LWZ, r, base, ofs);
   else
   else
-    emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs);
+    emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs);
 }
 }
 
 
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 {
   if (r < RID_MAX_GPR)
   if (r < RID_MAX_GPR)
-    emit_tai(as, PPCI_STW, r, RID_SP, ofs);
+    emit_tai(as, PPCI_STW, r, base, ofs);
   else
   else
-    emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs);
+    emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs);
 }
 }
 
 
 /* Emit a compare (for equality) with a constant operand. */
 /* Emit a compare (for equality) with a constant operand. */

+ 11 - 15
Source/ThirdParty/LuaJIT/src/lj_emit_x86.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** x86/x64 instruction emitter.
 ** x86/x64 instruction emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* -- Emit basic instructions --------------------------------------------- */
 /* -- Emit basic instructions --------------------------------------------- */
@@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
 
 
 /* -- Emit loads/stores --------------------------------------------------- */
 /* -- Emit loads/stores --------------------------------------------------- */
 
 
-/* Instruction selection for XMM moves. */
-#define XMM_MOVRR(as)	((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
-#define XMM_MOVRM(as)	((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
-
 /* mov [base+ofs], i */
 /* mov [base+ofs], i */
 static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
 static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
 {
 {
@@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
   if (tvispzero(tv))  /* Use xor only for +0. */
   if (tvispzero(tv))  /* Use xor only for +0. */
     emit_rr(as, XO_XORPS, r, r);
     emit_rr(as, XO_XORPS, r, r);
   else
   else
-    emit_rma(as, XMM_MOVRM(as), r, &tv->n);
+    emit_rma(as, XO_MOVSD, r, &tv->n);
 }
 }
 
 
 /* -- Emit control-flow instructions -------------------------------------- */
 /* -- Emit control-flow instructions -------------------------------------- */
@@ -427,25 +423,25 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
   if (dst < RID_MAX_GPR)
   if (dst < RID_MAX_GPR)
     emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
     emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
   else
   else
-    emit_rr(as, XMM_MOVRR(as), dst, src);
+    emit_rr(as, XO_MOVAPS, dst, src);
 }
 }
 
 
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 {
   if (r < RID_MAX_GPR)
   if (r < RID_MAX_GPR)
-    emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs);
+    emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
   else
   else
-    emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs);
+    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
 }
 }
 
 
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 {
   if (r < RID_MAX_GPR)
   if (r < RID_MAX_GPR)
-    emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs);
+    emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
   else
   else
-    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs);
+    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
 }
 }
 
 
 /* Add offset to pointer. */
 /* Add offset to pointer. */

+ 124 - 77
Source/ThirdParty/LuaJIT/src/lj_err.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Error handling.
 ** Error handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 // Modified by Lasse Oorni for Urho3D
 // Modified by Lasse Oorni for Urho3D
@@ -18,6 +18,7 @@
 #include "lj_ff.h"
 #include "lj_ff.h"
 #include "lj_trace.h"
 #include "lj_trace.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
+#include "lj_strfmt.h"
 
 
 /*
 /*
 ** LuaJIT can either use internal or external frame unwinding:
 ** LuaJIT can either use internal or external frame unwinding:
@@ -59,10 +60,10 @@
 ** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH.
 ** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH.
 ** EXT is mandatory on WIN64 since the calling convention has an abundance
 ** EXT is mandatory on WIN64 since the calling convention has an abundance
 ** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
 ** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
-** EXT is mandatory on POSIX/x64 since the interpreter doesn't save r12/r13.
+** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
 */
 */
 
 
-#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL))
+#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
 #define LJ_UNWIND_EXT	1
 #define LJ_UNWIND_EXT	1
 #elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
 #elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
 #define LJ_UNWIND_EXT	1
 #define LJ_UNWIND_EXT	1
@@ -100,14 +101,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
       TValue *top = restorestack(L, -nres);
       TValue *top = restorestack(L, -nres);
       if (frame < top) {  /* Frame reached? */
       if (frame < top) {  /* Frame reached? */
 	if (errcode) {
 	if (errcode) {
-	  L->cframe = cframe_prev(cf);
 	  L->base = frame+1;
 	  L->base = frame+1;
+	  L->cframe = cframe_prev(cf);
 	  unwindstack(L, top);
 	  unwindstack(L, top);
 	}
 	}
 	return cf;
 	return cf;
       }
       }
     }
     }
-    if (frame <= tvref(L->stack))
+    if (frame <= tvref(L->stack)+LJ_FR2)
       break;
       break;
     switch (frame_typep(frame)) {
     switch (frame_typep(frame)) {
     case FRAME_LUA:  /* Lua frame. */
     case FRAME_LUA:  /* Lua frame. */
@@ -115,14 +116,12 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
       frame = frame_prevl(frame);
       frame = frame_prevl(frame);
       break;
       break;
     case FRAME_C:  /* C frame. */
     case FRAME_C:  /* C frame. */
-#if LJ_HASFFI
     unwind_c:
     unwind_c:
-#endif
 #if LJ_UNWIND_EXT
 #if LJ_UNWIND_EXT
       if (errcode) {
       if (errcode) {
-	L->cframe = cframe_prev(cf);
 	L->base = frame_prevd(frame) + 1;
 	L->base = frame_prevd(frame) + 1;
-	unwindstack(L, frame);
+	L->cframe = cframe_prev(cf);
+	unwindstack(L, frame - LJ_FR2);
       } else if (cf != stopcf) {
       } else if (cf != stopcf) {
 	cf = cframe_prev(cf);
 	cf = cframe_prev(cf);
 	frame = frame_prevd(frame);
 	frame = frame_prevd(frame);
@@ -145,16 +144,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 	return cf;
 	return cf;
       }
       }
       if (errcode) {
       if (errcode) {
-	L->cframe = cframe_prev(cf);
 	L->base = frame_prevd(frame) + 1;
 	L->base = frame_prevd(frame) + 1;
-	unwindstack(L, frame);
+	L->cframe = cframe_prev(cf);
+	unwindstack(L, frame - LJ_FR2);
       }
       }
       return cf;
       return cf;
     case FRAME_CONT:  /* Continuation frame. */
     case FRAME_CONT:  /* Continuation frame. */
-#if LJ_HASFFI
-      if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+      if (frame_iscont_fficb(frame))
 	goto unwind_c;
 	goto unwind_c;
-#endif
     case FRAME_VARG:  /* Vararg frame. */
     case FRAME_VARG:  /* Vararg frame. */
       frame = frame_prevd(frame);
       frame = frame_prevd(frame);
       break;
       break;
@@ -167,8 +164,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 	}
 	}
 	if (frame_typep(frame) == FRAME_PCALL)
 	if (frame_typep(frame) == FRAME_PCALL)
 	  hook_leave(G(L));
 	  hook_leave(G(L));
-	L->cframe = cf;
 	L->base = frame_prevd(frame) + 1;
 	L->base = frame_prevd(frame) + 1;
+	L->cframe = cf;
 	unwindstack(L, L->base);
 	unwindstack(L, L->base);
       }
       }
       return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
       return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
@@ -176,8 +173,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
   }
   }
   /* No C frame. */
   /* No C frame. */
   if (errcode) {
   if (errcode) {
+    L->base = tvref(L->stack)+1+LJ_FR2;
     L->cframe = NULL;
     L->cframe = NULL;
-    L->base = tvref(L->stack)+1;
     unwindstack(L, L->base);
     unwindstack(L, L->base);
     if (G(L)->panic)
     if (G(L)->panic)
       G(L)->panic(L);
       G(L)->panic(L);
@@ -188,20 +185,13 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 
 
 /* -- External frame unwinding -------------------------------------------- */
 /* -- External frame unwinding -------------------------------------------- */
 
 
-#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_TARGET_WINDOWS
+#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN
 
 
 /*
 /*
 ** We have to use our own definitions instead of the mandatory (!) unwind.h,
 ** We have to use our own definitions instead of the mandatory (!) unwind.h,
 ** since various OS, distros and compilers mess up the header installation.
 ** since various OS, distros and compilers mess up the header installation.
 */
 */
 
 
-typedef struct _Unwind_Exception
-{
-  uint64_t exclass;
-  void (*excleanup)(int, struct _Unwind_Exception *);
-  uintptr_t p1, p2;
-} __attribute__((__aligned__)) _Unwind_Exception;
-
 typedef struct _Unwind_Context _Unwind_Context;
 typedef struct _Unwind_Context _Unwind_Context;
 
 
 #define _URC_OK			0
 #define _URC_OK			0
@@ -211,8 +201,20 @@ typedef struct _Unwind_Context _Unwind_Context;
 #define _URC_CONTINUE_UNWIND	8
 #define _URC_CONTINUE_UNWIND	8
 #define _URC_FAILURE		9
 #define _URC_FAILURE		9
 
 
+#define LJ_UEXCLASS		0x4c55414a49543200ULL	/* LUAJIT2\0 */
+#define LJ_UEXCLASS_MAKE(c)	(LJ_UEXCLASS | (uint64_t)(c))
+#define LJ_UEXCLASS_CHECK(cl)	(((cl) ^ LJ_UEXCLASS) <= 0xff)
+#define LJ_UEXCLASS_ERRCODE(cl)	((int)((cl) & 0xff))
+
 #if !LJ_TARGET_ARM
 #if !LJ_TARGET_ARM
 
 
+typedef struct _Unwind_Exception
+{
+  uint64_t exclass;
+  void (*excleanup)(int, struct _Unwind_Exception *);
+  uintptr_t p1, p2;
+} __attribute__((__aligned__)) _Unwind_Exception;
+
 extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
 extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
 extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
 extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
 extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
 extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
@@ -224,11 +226,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *);
 #define _UA_HANDLER_FRAME	4
 #define _UA_HANDLER_FRAME	4
 #define _UA_FORCE_UNWIND	8
 #define _UA_FORCE_UNWIND	8
 
 
-#define LJ_UEXCLASS		0x4c55414a49543200ULL	/* LUAJIT2\0 */
-#define LJ_UEXCLASS_MAKE(c)	(LJ_UEXCLASS | (uint64_t)(c))
-#define LJ_UEXCLASS_CHECK(cl)	(((cl) ^ LJ_UEXCLASS) <= 0xff)
-#define LJ_UEXCLASS_ERRCODE(cl)	((int)((cl) & 0xff))
-
 /* DWARF2 personality handler referenced from interpreter .eh_frame. */
 /* DWARF2 personality handler referenced from interpreter .eh_frame. */
 LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
 LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
   uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
   uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
@@ -307,10 +304,23 @@ static void err_raise_ext(int errcode)
 }
 }
 #endif
 #endif
 
 
-#else
+#else /* LJ_TARGET_ARM */
 
 
-extern void _Unwind_DeleteException(void *);
-extern int __gnu_unwind_frame (void *, _Unwind_Context *);
+#define _US_VIRTUAL_UNWIND_FRAME	0
+#define _US_UNWIND_FRAME_STARTING	1
+#define _US_ACTION_MASK			3
+#define _US_FORCE_UNWIND		8
+
+typedef struct _Unwind_Control_Block _Unwind_Control_Block;
+typedef struct _Unwind_Context _Unwind_Context;
+
+struct _Unwind_Control_Block {
+  uint64_t exclass;
+  uint32_t misc[20];
+};
+
+extern int _Unwind_RaiseException(_Unwind_Control_Block *);
+extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *);
 extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *);
 extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *);
 extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *);
 extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *);
 
 
@@ -326,35 +336,58 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v)
   _Unwind_VRS_Set(ctx, 0, r, 0, &v);
   _Unwind_VRS_Set(ctx, 0, r, 0, &v);
 }
 }
 
 
-#define _US_VIRTUAL_UNWIND_FRAME	0
-#define _US_UNWIND_FRAME_STARTING	1
-#define _US_ACTION_MASK			3
-#define _US_FORCE_UNWIND		8
+extern void lj_vm_unwind_ext(void);
 
 
 /* ARM unwinder personality handler referenced from interpreter .ARM.extab. */
 /* ARM unwinder personality handler referenced from interpreter .ARM.extab. */
-LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx)
+LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
+			       _Unwind_Context *ctx)
 {
 {
   void *cf = (void *)_Unwind_GetGR(ctx, 13);
   void *cf = (void *)_Unwind_GetGR(ctx, 13);
   lua_State *L = cframe_L(cf);
   lua_State *L = cframe_L(cf);
-  if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) {
-    setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+  int errcode;
+
+  switch ((state & _US_ACTION_MASK)) {
+  case _US_VIRTUAL_UNWIND_FRAME:
+    if ((state & _US_FORCE_UNWIND)) break;
     return _URC_HANDLER_FOUND;
     return _URC_HANDLER_FOUND;
-  }
-  if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) {
-    _Unwind_DeleteException(ucb);
-    _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw);
-    _Unwind_SetGR(ctx, 0, (uint32_t)L);
-    _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN);
+  case _US_UNWIND_FRAME_STARTING:
+    if (LJ_UEXCLASS_CHECK(ucb->exclass)) {
+      errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass);
+    } else {
+      errcode = LUA_ERRRUN;
+      setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+    }
+    cf = err_unwind(L, cf, errcode);
+    if ((state & _US_FORCE_UNWIND) || cf == NULL) break;
+    _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext);
+    _Unwind_SetGR(ctx, 0, (uint32_t)ucb);
+    _Unwind_SetGR(ctx, 1, (uint32_t)errcode);
+    _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ?
+			    (uint32_t)lj_vm_unwind_ff_eh :
+			    (uint32_t)lj_vm_unwind_c_eh);
     return _URC_INSTALL_CONTEXT;
     return _URC_INSTALL_CONTEXT;
+  default:
+    return _URC_FAILURE;
   }
   }
   if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
   if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
     return _URC_FAILURE;
     return _URC_FAILURE;
   return _URC_CONTINUE_UNWIND;
   return _URC_CONTINUE_UNWIND;
 }
 }
 
 
+#if LJ_UNWIND_EXT
+static __thread _Unwind_Control_Block static_uex;
+
+static void err_raise_ext(int errcode)
+{
+  memset(&static_uex, 0, sizeof(static_uex));
+  static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
+  _Unwind_RaiseException(&static_uex);
+}
 #endif
 #endif
 
 
-#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
+#endif /* LJ_TARGET_ARM */
+
+#elif LJ_TARGET_X64 && LJ_ABI_WIN
 
 
 /*
 /*
 ** Someone in Redmond owes me several days of my life. A lot of this is
 ** Someone in Redmond owes me several days of my life. A lot of this is
@@ -386,7 +419,7 @@ typedef struct UndocumentedDispatcherContext {
   ULONG64 EstablisherFrame;
   ULONG64 EstablisherFrame;
   ULONG64 TargetIp;
   ULONG64 TargetIp;
   PCONTEXT ContextRecord;
   PCONTEXT ContextRecord;
-  PEXCEPTION_ROUTINE LanguageHandler;
+  void (*LanguageHandler)(void);
   PVOID HandlerData;
   PVOID HandlerData;
   PUNWIND_HISTORY_TABLE HistoryTable;
   PUNWIND_HISTORY_TABLE HistoryTable;
   ULONG ScopeIndex;
   ULONG ScopeIndex;
@@ -425,7 +458,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
     if (cf2) {  /* We catch it, so start unwinding the upper frames. */
     if (cf2) {  /* We catch it, so start unwinding the upper frames. */
       if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
       if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
 	  rec->ExceptionCode == LJ_GCC_EXCODE) {
 	  rec->ExceptionCode == LJ_GCC_EXCODE) {
+#if LJ_TARGET_WINDOWS
 	__DestructExceptionObject(rec, 1);
 	__DestructExceptionObject(rec, 1);
+#endif
 	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
 	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
       } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
       } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
 	/* Don't catch access violations etc. */
 	/* Don't catch access violations etc. */
@@ -460,7 +495,7 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
 {
 {
   global_State *g = G(L);
   global_State *g = G(L);
   lj_trace_abort(g);
   lj_trace_abort(g);
-  setgcrefnull(g->jit_L);
+  setmref(g->jit_base, NULL);
   L->status = 0;
   L->status = 0;
 #if LJ_UNWIND_EXT
 #if LJ_UNWIND_EXT
   err_raise_ext(errcode);
   err_raise_ext(errcode);
@@ -505,10 +540,9 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
 /* Find error function for runtime errors. Requires an extra stack traversal. */
 /* Find error function for runtime errors. Requires an extra stack traversal. */
 static ptrdiff_t finderrfunc(lua_State *L)
 static ptrdiff_t finderrfunc(lua_State *L)
 {
 {
-  cTValue *frame = L->base-1, *bot = tvref(L->stack);
+  cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2;
   void *cf = L->cframe;
   void *cf = L->cframe;
-  while (frame > bot) {
-    lua_assert(cf != NULL);
+  while (frame > bot && cf) {
     while (cframe_nres(cframe_raw(cf)) < 0) {  /* cframe without frame? */
     while (cframe_nres(cframe_raw(cf)) < 0) {  /* cframe without frame? */
       if (frame >= restorestack(L, -cframe_nres(cf)))
       if (frame >= restorestack(L, -cframe_nres(cf)))
 	break;
 	break;
@@ -526,12 +560,12 @@ static ptrdiff_t finderrfunc(lua_State *L)
     case FRAME_C:
     case FRAME_C:
       cf = cframe_prev(cf);
       cf = cframe_prev(cf);
       /* fallthrough */
       /* fallthrough */
+    case FRAME_VARG:
+      frame = frame_prevd(frame);
+      break;
     case FRAME_CONT:
     case FRAME_CONT:
-#if LJ_HASFFI
-      if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+      if (frame_iscont_fficb(frame))
 	cf = cframe_prev(cf);
 	cf = cframe_prev(cf);
-#endif
-    case FRAME_VARG:
       frame = frame_prevd(frame);
       frame = frame_prevd(frame);
       break;
       break;
     case FRAME_CP:
     case FRAME_CP:
@@ -542,8 +576,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
       break;
       break;
     case FRAME_PCALL:
     case FRAME_PCALL:
     case FRAME_PCALLH:
     case FRAME_PCALLH:
-      if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue)))  /* xpcall? */
-	return savestack(L, frame-1);  /* Point to xpcall's errorfunc. */
+      if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall)
+	return savestack(L, frame_prevd(frame)+1);  /* xpcall's errorfunc. */
       return 0;
       return 0;
     default:
     default:
       lua_assert(0);
       lua_assert(0);
@@ -566,8 +600,9 @@ LJ_NOINLINE void lj_err_run(lua_State *L)
       lj_err_throw(L, LUA_ERRERR);
       lj_err_throw(L, LUA_ERRERR);
     }
     }
     L->status = LUA_ERRERR;
     L->status = LUA_ERRERR;
-    copyTV(L, top, top-1);
+    copyTV(L, top+LJ_FR2, top-1);
     copyTV(L, top-1, errfunc);
     copyTV(L, top-1, errfunc);
+    if (LJ_FR2) setnilV(top++);
     L->top = top+1;
     L->top = top+1;
     lj_vm_call(L, top, 1+1);  /* Stack: |errfunc|msg| -> |msg| */
     lj_vm_call(L, top, 1+1);  /* Stack: |errfunc|msg| -> |msg| */
   }
   }
@@ -581,7 +616,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
   va_list argp;
   va_list argp;
   va_start(argp, em);
   va_start(argp, em);
   if (curr_funcisL(L)) L->top = curr_topL(L);
   if (curr_funcisL(L)) L->top = curr_topL(L);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);
   va_end(argp);
   lj_debug_addloc(L, msg, L->base-1, NULL);
   lj_debug_addloc(L, msg, L->base-1, NULL);
   lj_err_run(L);
   lj_err_run(L);
@@ -599,11 +634,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
 {
 {
   char buff[LUA_IDSIZE];
   char buff[LUA_IDSIZE];
   const char *msg;
   const char *msg;
-  lj_debug_shortname(buff, src);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
-  msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg);
+  lj_debug_shortname(buff, src, line);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg);
   if (tok)
   if (tok)
-    lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
+    lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
   lj_err_throw(L, LUA_ERRSYNTAX);
   lj_err_throw(L, LUA_ERRSYNTAX);
 }
 }
 
 
@@ -642,8 +677,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
   const BCIns *pc = cframe_Lpc(L);
   const BCIns *pc = cframe_Lpc(L);
   if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
   if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
     const char *tname = lj_typename(o);
     const char *tname = lj_typename(o);
+    if (LJ_FR2) o++;
     setframe_pc(o, pc);
     setframe_pc(o, pc);
-    setframe_gc(o, obj2gco(L));
+    setframe_gc(o, obj2gco(L), LJ_TTHREAD);
     L->top = L->base = o+1;
     L->top = L->base = o+1;
     err_msgv(L, LJ_ERR_BADCALL, tname);
     err_msgv(L, LJ_ERR_BADCALL, tname);
   }
   }
@@ -658,13 +694,10 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
   if (frame_islua(frame)) {
   if (frame_islua(frame)) {
     pframe = frame_prevl(frame);
     pframe = frame_prevl(frame);
   } else if (frame_iscont(frame)) {
   } else if (frame_iscont(frame)) {
-#if LJ_HASFFI
-    if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) {
+    if (frame_iscont_fficb(frame)) {
       pframe = frame;
       pframe = frame;
       frame = NULL;
       frame = NULL;
-    } else
-#endif
-    {
+    } else {
       pframe = frame_prevd(frame);
       pframe = frame_prevd(frame);
 #if LJ_HASFFI
 #if LJ_HASFFI
       /* Remove frame for FFI metamethods. */
       /* Remove frame for FFI metamethods. */
@@ -687,7 +720,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
   const char *msg;
   const char *msg;
   va_list argp;
   va_list argp;
   va_start(argp, em);
   va_start(argp, em);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);
   va_end(argp);
   lj_err_callermsg(L, msg);
   lj_err_callermsg(L, msg);
 }
 }
@@ -707,9 +740,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
   if (narg < 0 && narg > LUA_REGISTRYINDEX)
   if (narg < 0 && narg > LUA_REGISTRYINDEX)
     narg = (int)(L->top - L->base) + narg + 1;
     narg = (int)(L->top - L->base) + narg + 1;
   if (ftype && ftype[3] == 'h' && --narg == 0)  /* Check for "method". */
   if (ftype && ftype[3] == 'h' && --narg == 0)  /* Check for "method". */
-    msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
+    msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
   else
   else
-    msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
+    msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
   lj_err_callermsg(L, msg);
   lj_err_callermsg(L, msg);
 }
 }
 
 
@@ -719,7 +752,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
   const char *msg;
   const char *msg;
   va_list argp;
   va_list argp;
   va_start(argp, em);
   va_start(argp, em);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);
   va_end(argp);
   err_argmsg(L, narg, msg);
   err_argmsg(L, narg, msg);
 }
 }
@@ -733,9 +766,23 @@ LJ_NOINLINE void lj_err_arg(lua_State *L, int narg, ErrMsg em)
 /* Typecheck error for arguments. */
 /* Typecheck error for arguments. */
 LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
 LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
 {
 {
-  TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
-  const char *tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
-  const char *msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
+  const char *tname, *msg;
+  if (narg <= LUA_REGISTRYINDEX) {
+    if (narg >= LUA_GLOBALSINDEX) {
+      tname = lj_obj_itypename[~LJ_TTAB];
+    } else {
+      GCfunc *fn = curr_func(L);
+      int idx = LUA_GLOBALSINDEX - narg;
+      if (idx <= fn->c.nupvalues)
+	tname = lj_typename(&fn->c.upvalue[idx-1]);
+      else
+	tname = lj_obj_typename[0];
+    }
+  } else {
+    TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
+    tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
+  }
+  msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
   err_argmsg(L, narg, msg);
   err_argmsg(L, narg, msg);
 }
 }
 
 
@@ -785,7 +832,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
   const char *msg;
   const char *msg;
   va_list argp;
   va_list argp;
   va_start(argp, fmt);
   va_start(argp, fmt);
-  msg = lj_str_pushvf(L, fmt, argp);
+  msg = lj_strfmt_pushvf(L, fmt, argp);
   va_end(argp);
   va_end(argp);
   lj_err_callermsg(L, msg);
   lj_err_callermsg(L, msg);
   return 0;  /* unreachable */
   return 0;  /* unreachable */

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lj_err.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Error handling.
 ** Error handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_ERR_H
 #ifndef _LJ_ERR_H

+ 2 - 5
Source/ThirdParty/LuaJIT/src/lj_errmsg.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** VM error messages.
 ** VM error messages.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 /* This file may be included multiple times with different ERRDEF macros. */
 /* This file may be included multiple times with different ERRDEF macros. */
@@ -96,9 +96,7 @@ ERRDEF(STRPATX,	"pattern too complex")
 ERRDEF(STRCAPI,	"invalid capture index")
 ERRDEF(STRCAPI,	"invalid capture index")
 ERRDEF(STRCAPN,	"too many captures")
 ERRDEF(STRCAPN,	"too many captures")
 ERRDEF(STRCAPU,	"unfinished capture")
 ERRDEF(STRCAPU,	"unfinished capture")
-ERRDEF(STRFMTO,	"invalid option " LUA_QL("%%%c") " to " LUA_QL("format"))
-ERRDEF(STRFMTR,	"invalid format (repeated flags)")
-ERRDEF(STRFMTW,	"invalid format (width or precision too long)")
+ERRDEF(STRFMT,	"invalid option " LUA_QS " to " LUA_QL("format"))
 ERRDEF(STRGSRV,	"invalid replacement value (a %s)")
 ERRDEF(STRGSRV,	"invalid replacement value (a %s)")
 ERRDEF(BADMODN,	"name conflict for module " LUA_QS)
 ERRDEF(BADMODN,	"name conflict for module " LUA_QS)
 #if LJ_HASJIT
 #if LJ_HASJIT
@@ -118,7 +116,6 @@ ERRDEF(JITOPT,	"unknown or malformed optimization flag " LUA_QS)
 /* Lexer/parser errors. */
 /* Lexer/parser errors. */
 ERRDEF(XMODE,	"attempt to load chunk with wrong mode")
 ERRDEF(XMODE,	"attempt to load chunk with wrong mode")
 ERRDEF(XNEAR,	"%s near " LUA_QS)
 ERRDEF(XNEAR,	"%s near " LUA_QS)
-ERRDEF(XELEM,	"lexical element too long")
 ERRDEF(XLINES,	"chunk has too many lines")
 ERRDEF(XLINES,	"chunk has too many lines")
 ERRDEF(XLEVELS,	"chunk has too many syntax levels")
 ERRDEF(XLEVELS,	"chunk has too many syntax levels")
 ERRDEF(XNUMBER,	"malformed number")
 ERRDEF(XNUMBER,	"malformed number")

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lj_ff.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Fast function IDs.
 ** Fast function IDs.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_FF_H
 #ifndef _LJ_FF_H

+ 449 - 120
Source/ThirdParty/LuaJIT/src/lj_ffrecord.c

@@ -1,6 +1,6 @@
 /*
 /*
 ** Fast function call recorder.
 ** Fast function call recorder.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #define lj_ffrecord_c
 #define lj_ffrecord_c
@@ -27,6 +27,7 @@
 #include "lj_dispatch.h"
 #include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_vm.h"
 #include "lj_strscan.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 
 /* Some local macros to save typing. Undef'd at the end. */
 /* Some local macros to save typing. Undef'd at the end. */
 #define IR(ref)			(&J->cur.ir[(ref)])
 #define IR(ref)			(&J->cur.ir[(ref)])
@@ -79,10 +80,7 @@ static GCstr *argv2str(jit_State *J, TValue *o)
     GCstr *s;
     GCstr *s;
     if (!tvisnumber(o))
     if (!tvisnumber(o))
       lj_trace_err(J, LJ_TRERR_BADTYPE);
       lj_trace_err(J, LJ_TRERR_BADTYPE);
-    if (tvisint(o))
-      s = lj_str_fromint(J->L, intV(o));
-    else
-      s = lj_str_fromnum(J->L, &o->n);
+    s = lj_strfmt_number(J->L, o);
     setstrV(J->L, o, s);
     setstrV(J->L, o, s);
     return s;
     return s;
   }
   }
@@ -98,27 +96,91 @@ static ptrdiff_t results_wanted(jit_State *J)
     return -1;
     return -1;
 }
 }
 
 
-/* Throw error for unsupported variant of fast function. */
-LJ_NORET static void recff_nyiu(jit_State *J)
+/* Trace stitching: add continuation below frame to start a new trace. */
+static void recff_stitch(jit_State *J)
 {
 {
-  setfuncV(J->L, &J->errinfo, J->fn);
-  lj_trace_err_info(J, LJ_TRERR_NYIFFU);
+  ASMFunction cont = lj_cont_stitch;
+  lua_State *L = J->L;
+  TValue *base = L->base;
+  const BCIns *pc = frame_pc(base-1);
+  TValue *pframe = frame_prevl(base-1);
+  TRef trcont;
+
+  lua_assert(!LJ_FR2);  /* TODO_FR2: handle frame shift. */
+  /* Move func + args up in Lua stack and insert continuation. */
+  memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
+  setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
+  setcont(base, cont);
+  setframe_pc(base, pc);
+  setnilV(base-1);  /* Incorrect, but rec_check_slots() won't run anymore. */
+  L->base += 2;
+  L->top += 2;
+
+  /* Ditto for the IR. */
+  memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1));
+#if LJ_64
+  trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
+#else
+  trcont = lj_ir_kptr(J, (void *)cont);
+#endif
+  J->base[0] = trcont | TREF_CONT;
+  J->ktracep = lj_ir_k64_reserve(J);
+  lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
+  J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0);
+  J->base += 2;
+  J->baseslot += 2;
+  J->framedepth++;
+
+  lj_record_stop(J, LJ_TRLINK_STITCH, 0);
+
+  /* Undo Lua stack changes. */
+  memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1));
+  setframe_pc(base-1, pc);
+  L->base -= 2;
+  L->top -= 2;
 }
 }
 
 
-/* Fallback handler for all fast functions that are not recorded (yet). */
+/* Fallback handler for fast functions that are not recorded (yet). */
 static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
 static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
 {
 {
-  setfuncV(J->L, &J->errinfo, J->fn);
-  lj_trace_err_info(J, LJ_TRERR_NYIFF);
-  UNUSED(rd);
+  if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
+    lj_trace_err_info(J, LJ_TRERR_TRACEUV);
+  } else {
+    /* Can only stitch from Lua call. */
+    if (J->framedepth && frame_islua(J->L->base-1)) {
+      BCOp op = bc_op(*frame_pc(J->L->base-1));
+      /* Stitched trace cannot start with *M op with variable # of args. */
+      if (!(op == BC_CALLM || op == BC_CALLMT ||
+	    op == BC_RETM || op == BC_TSETM)) {
+	switch (J->fn->c.ffid) {
+	case FF_error:
+	case FF_debug_sethook:
+	case FF_jit_flush:
+	  break;  /* Don't stitch across special builtins. */
+	default:
+	  recff_stitch(J);  /* Use trace stitching. */
+	  rd->nres = -1;
+	  return;
+	}
+      }
+    }
+    /* Otherwise stop trace and return to interpreter. */
+    lj_record_stop(J, LJ_TRLINK_RETURN, 0);
+    rd->nres = -1;
+  }
 }
 }
 
 
-/* C functions can have arbitrary side-effects and are not recorded (yet). */
-static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd)
+/* Fallback handler for unsupported variants of fast functions. */
+#define recff_nyiu	recff_nyi
+
+/* Must stop the trace for classic C functions with arbitrary side-effects. */
+#define recff_c		recff_nyi
+
+/* Emit BUFHDR for the global temporary buffer. */
+static TRef recff_bufhdr(jit_State *J)
 {
 {
-  setfuncV(J->L, &J->errinfo, J->fn);
-  lj_trace_err_info(J, LJ_TRERR_NYICF);
-  UNUSED(rd);
+  return emitir(IRT(IR_BUFHDR, IRT_P32),
+		lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
 }
 }
 
 
 /* -- Base library fast functions ----------------------------------------- */
 /* -- Base library fast functions ----------------------------------------- */
@@ -135,7 +197,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
   uint32_t t;
   uint32_t t;
   if (tvisnumber(&rd->argv[0]))
   if (tvisnumber(&rd->argv[0]))
     t = ~LJ_TNUMX;
     t = ~LJ_TNUMX;
-  else if (LJ_64 && tvislightud(&rd->argv[0]))
+  else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0]))
     t = ~LJ_TLIGHTUD;
     t = ~LJ_TLIGHTUD;
   else
   else
     t = ~itype(&rd->argv[0]);
     t = ~itype(&rd->argv[0]);
@@ -263,7 +325,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
 	  J->base[i] = J->base[start+i];
 	  J->base[i] = J->base[start+i];
       }  /* else: Interpreter will throw. */
       }  /* else: Interpreter will throw. */
     } else {
     } else {
-      recff_nyiu(J);
+      recff_nyiu(J, rd);
+      return;
     }
     }
   }  /* else: Interpreter will throw. */
   }  /* else: Interpreter will throw. */
 }
 }
@@ -274,14 +337,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
   TRef base = J->base[1];
   TRef base = J->base[1];
   if (tr && !tref_isnil(base)) {
   if (tr && !tref_isnil(base)) {
     base = lj_opt_narrow_toint(J, base);
     base = lj_opt_narrow_toint(J, base);
-    if (!tref_isk(base) || IR(tref_ref(base))->i != 10)
-      recff_nyiu(J);
+    if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
+      recff_nyiu(J, rd);
+      return;
+    }
   }
   }
   if (tref_isnumber_str(tr)) {
   if (tref_isnumber_str(tr)) {
     if (tref_isstr(tr)) {
     if (tref_isstr(tr)) {
       TValue tmp;
       TValue tmp;
-      if (!lj_strscan_num(strV(&rd->argv[0]), &tmp))
-	recff_nyiu(J);  /* Would need an inverted STRTO for this case. */
+      if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
+	recff_nyiu(J, rd);  /* Would need an inverted STRTO for this case. */
+	return;
+      }
       tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
       tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
     }
     }
 #if LJ_HASFFI
 #if LJ_HASFFI
@@ -336,13 +403,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
   if (tref_isstr(tr)) {
   if (tref_isstr(tr)) {
     /* Ignore __tostring in the string base metatable. */
     /* Ignore __tostring in the string base metatable. */
     /* Pass on result in J->base[0]. */
     /* Pass on result in J->base[0]. */
-  } else if (!recff_metacall(J, rd, MM_tostring)) {
+  } else if (tr && !recff_metacall(J, rd, MM_tostring)) {
     if (tref_isnumber(tr)) {
     if (tref_isnumber(tr)) {
-      J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
+      J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr,
+			  tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
     } else if (tref_ispri(tr)) {
     } else if (tref_ispri(tr)) {
-      J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)]));
+      J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
     } else {
     } else {
-      recff_nyiu(J);
+      recff_nyiu(J, rd);
+      return;
     }
     }
   }
   }
 }
 }
@@ -364,14 +433,15 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
   }  /* else: Interpreter will throw. */
   }  /* else: Interpreter will throw. */
 }
 }
 
 
-static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
 {
 {
-  if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) {
-    TRef tab = J->base[0];
-    if (tref_istab(tab)) {
+  TRef tr = J->base[0];
+  if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) &&
+	recff_metacall(J, rd, MM_pairs + rd->data))) {
+    if (tref_istab(tr)) {
       J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
       J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
-      J->base[1] = tab;
-      J->base[2] = lj_ir_kint(J, 0);
+      J->base[1] = tr;
+      J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
       rd->nres = 3;
       rd->nres = 3;
     }  /* else: Interpreter will throw. */
     }  /* else: Interpreter will throw. */
   }
   }
@@ -399,6 +469,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
     TValue argv0, argv1;
     TValue argv0, argv1;
     TRef tmp;
     TRef tmp;
     int errcode;
     int errcode;
+    lua_assert(!LJ_FR2);  /* TODO_FR2: handle different frame setup. */
     /* Swap function and traceback. */
     /* Swap function and traceback. */
     tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
     tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
     copyTV(J->L, &argv0, &rd->argv[0]);
     copyTV(J->L, &argv0, &rd->argv[0]);
@@ -416,6 +487,18 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
   }  /* else: Interpreter will throw. */
   }  /* else: Interpreter will throw. */
 }
 }
 
 
+static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
+{
+  TRef tr = J->base[0];
+  /* Only support getfenv(0) for now. */
+  if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) {
+    TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
+    J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
+    return;
+  }
+  recff_nyiu(J, rd);
+}
+
 /* -- Math library fast functions ----------------------------------------- */
 /* -- Math library fast functions ----------------------------------------- */
 
 
 static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
 static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
@@ -528,14 +611,6 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
   rd->nres = 2;
   rd->nres = 2;
 }
 }
 
 
-static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
-{
-  TRef tr = lj_ir_tonum(J, J->base[0]);
-  TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
-  J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
-  UNUSED(rd);
-}
-
 static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
 static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
 {
 {
   TRef tr = lj_ir_tonum(J, J->base[0]);
   TRef tr = lj_ir_tonum(J, J->base[0]);
@@ -592,48 +667,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
 
 
 /* -- Bit library fast functions ------------------------------------------ */
 /* -- Bit library fast functions ------------------------------------------ */
 
 
-/* Record unary bit.tobit, bit.bnot, bit.bswap. */
+/* Record bit.tobit. */
+static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
+{
+  TRef tr = J->base[0];
+#if LJ_HASFFI
+  if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
+#endif
+  J->base[0] = lj_opt_narrow_tobit(J, tr);
+  UNUSED(rd);
+}
+
+/* Record unary bit.bnot, bit.bswap. */
 static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
 static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
 {
 {
-  TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
-  J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0);
+#if LJ_HASFFI
+  if (recff_bit64_unary(J, rd))
+    return;
+#endif
+  J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
 }
 }
 
 
 /* Record N-ary bit.band, bit.bor, bit.bxor. */
 /* Record N-ary bit.band, bit.bor, bit.bxor. */
 static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
 static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
 {
 {
-  TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
-  uint32_t op = rd->data;
-  BCReg i;
-  for (i = 1; J->base[i] != 0; i++)
-    tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i]));
-  J->base[0] = tr;
+#if LJ_HASFFI
+  if (recff_bit64_nary(J, rd))
+    return;
+#endif
+  {
+    TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
+    uint32_t ot = IRTI(rd->data);
+    BCReg i;
+    for (i = 1; J->base[i] != 0; i++)
+      tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
+    J->base[0] = tr;
+  }
 }
 }
 
 
 /* Record bit shifts. */
 /* Record bit shifts. */
 static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
 static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
 {
 {
-  TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
-  TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
-  IROp op = (IROp)rd->data;
-  if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
-      !tref_isk(tsh))
-    tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
+#if LJ_HASFFI
+  if (recff_bit64_shift(J, rd))
+    return;
+#endif
+  {
+    TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
+    TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
+    IROp op = (IROp)rd->data;
+    if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
+	!tref_isk(tsh))
+      tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
 #ifdef LJ_TARGET_UNIFYROT
 #ifdef LJ_TARGET_UNIFYROT
-  if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
-    op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
-    tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+    if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+      op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+      tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+    }
+#endif
+    J->base[0] = emitir(IRTI(op), tr, tsh);
   }
   }
+}
+
+static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
+{
+#if LJ_HASFFI
+  TRef hdr = recff_bufhdr(J);
+  TRef tr = recff_bit64_tohex(J, rd, hdr);
+  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+#else
+  recff_nyiu(J, rd);  /* Don't bother working around this NYI. */
 #endif
 #endif
-  J->base[0] = emitir(IRTI(op), tr, tsh);
 }
 }
 
 
 /* -- String library fast functions --------------------------------------- */
 /* -- String library fast functions --------------------------------------- */
 
 
-static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd)
+/* Specialize to relative starting position for string. */
+static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr,
+			       TRef trlen, TRef tr0)
 {
 {
-  J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN);
-  UNUSED(rd);
+  int32_t start = *st;
+  if (start < 0) {
+    emitir(IRTGI(IR_LT), tr, tr0);
+    tr = emitir(IRTI(IR_ADD), trlen, tr);
+    start = start + (int32_t)s->len;
+    emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0);
+    if (start < 0) {
+      tr = tr0;
+      start = 0;
+    }
+  } else if (start == 0) {
+    emitir(IRTGI(IR_EQ), tr, tr0);
+    tr = tr0;
+  } else {
+    tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1));
+    emitir(IRTGI(IR_GE), tr, tr0);
+    start--;
+  }
+  *st = start;
+  return tr;
 }
 }
 
 
 /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
 /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
@@ -680,29 +812,11 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
   } else if ((MSize)end <= str->len) {
   } else if ((MSize)end <= str->len) {
     emitir(IRTGI(IR_ULE), trend, trlen);
     emitir(IRTGI(IR_ULE), trend, trlen);
   } else {
   } else {
-    emitir(IRTGI(IR_GT), trend, trlen);
+    emitir(IRTGI(IR_UGT), trend, trlen);
     end = (int32_t)str->len;
     end = (int32_t)str->len;
     trend = trlen;
     trend = trlen;
   }
   }
-  if (start < 0) {
-    emitir(IRTGI(IR_LT), trstart, tr0);
-    trstart = emitir(IRTI(IR_ADD), trlen, trstart);
-    start = start+(int32_t)str->len;
-    emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
-    if (start < 0) {
-      trstart = tr0;
-      start = 0;
-    }
-  } else {
-    if (start == 0) {
-      emitir(IRTGI(IR_EQ), trstart, tr0);
-      trstart = tr0;
-    } else {
-      trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
-      emitir(IRTGI(IR_GE), trstart, tr0);
-      start--;
-    }
-  }
+  trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
   if (rd->data) {  /* Return string.sub result. */
   if (rd->data) {  /* Return string.sub result. */
     if (end - start >= 0) {
     if (end - start >= 0) {
       /* Also handle empty range here, to avoid extra traces. */
       /* Also handle empty range here, to avoid extra traces. */
@@ -712,7 +826,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
       J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
       J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
     } else {  /* Range underflow: return empty string. */
     } else {  /* Range underflow: return empty string. */
       emitir(IRTGI(IR_LT), trend, trstart);
       emitir(IRTGI(IR_LT), trend, trstart);
-      J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0));
+      J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
     }
     }
   } else {  /* Return string.byte result(s). */
   } else {  /* Return string.byte result(s). */
     ptrdiff_t i, len = end - start;
     ptrdiff_t i, len = end - start;
@@ -734,48 +848,200 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
   }
   }
 }
 }
 
 
-/* -- Table library fast functions ---------------------------------------- */
-
-static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
 {
 {
-  if (tref_istab(J->base[0]))
-    J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]);
-  /* else: Interpreter will throw. */
+  TRef k255 = lj_ir_kint(J, 255);
+  BCReg i;
+  for (i = 0; J->base[i] != 0; i++) {  /* Convert char values to strings. */
+    TRef tr = lj_opt_narrow_toint(J, J->base[i]);
+    emitir(IRTGI(IR_ULE), tr, k255);
+    J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR);
+  }
+  if (i > 1) {  /* Concatenate the strings, if there's more than one. */
+    TRef hdr = recff_bufhdr(J), tr = hdr;
+    for (i = 0; J->base[i] != 0; i++)
+      tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]);
+    J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  }
   UNUSED(rd);
   UNUSED(rd);
 }
 }
 
 
-static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
 {
 {
-  TRef tab = J->base[0];
-  rd->nres = 0;
-  if (tref_istab(tab)) {
-    if (tref_isnil(J->base[1])) {  /* Simple pop: t[#t] = nil */
-      TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab);
-      GCtab *t = tabV(&rd->argv[0]);
-      MSize len = lj_tab_len(t);
-      emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0));
-      if (len) {
-	RecordIndex ix;
-	ix.tab = tab;
-	ix.key = trlen;
-	settabV(J->L, &ix.tabv, t);
-	setintV(&ix.keyv, len);
-	ix.idxchain = 0;
-	if (results_wanted(J) != 0) {  /* Specialize load only if needed. */
-	  ix.val = 0;
-	  J->base[0] = lj_record_idx(J, &ix);  /* Load previous value. */
-	  rd->nres = 1;
-	  /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */
-	}
-	ix.val = TREF_NIL;
-	lj_record_idx(J, &ix);  /* Remove value. */
+  TRef str = lj_ir_tostr(J, J->base[0]);
+  TRef rep = lj_opt_narrow_toint(J, J->base[1]);
+  TRef hdr, tr, str2 = 0;
+  if (!tref_isnil(J->base[2])) {
+    TRef sep = lj_ir_tostr(J, J->base[2]);
+    int32_t vrep = argv2int(J, &rd->argv[1]);
+    emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
+    if (vrep > 1) {
+      TRef hdr2 = recff_bufhdr(J);
+      TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep);
+      tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str);
+      str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
+    }
+  }
+  tr = hdr = recff_bufhdr(J);
+  if (str2) {
+    tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str);
+    str = str2;
+    rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
+  }
+  tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
+  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+}
+
+static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
+{
+  TRef str = lj_ir_tostr(J, J->base[0]);
+  TRef hdr = recff_bufhdr(J);
+  TRef tr = lj_ir_call(J, rd->data, hdr, str);
+  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+}
+
+static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
+{
+  TRef trstr = lj_ir_tostr(J, J->base[0]);
+  TRef trpat = lj_ir_tostr(J, J->base[1]);
+  TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
+  TRef tr0 = lj_ir_kint(J, 0);
+  TRef trstart;
+  GCstr *str = argv2str(J, &rd->argv[0]);
+  GCstr *pat = argv2str(J, &rd->argv[1]);
+  int32_t start;
+  J->needsnap = 1;
+  if (tref_isnil(J->base[2])) {
+    trstart = lj_ir_kint(J, 1);
+    start = 1;
+  } else {
+    trstart = lj_opt_narrow_toint(J, J->base[2]);
+    start = argv2int(J, &rd->argv[2]);
+  }
+  trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
+  if ((MSize)start <= str->len) {
+    emitir(IRTGI(IR_ULE), trstart, trlen);
+  } else {
+    emitir(IRTGI(IR_UGT), trstart, trlen);
+#if LJ_52
+    J->base[0] = TREF_NIL;
+    return;
+#else
+    trstart = trlen;
+    start = str->len;
+#endif
+  }
+  /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */
+  if ((J->base[2] && tref_istruecond(J->base[3])) ||
+      (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
+       !lj_str_haspattern(pat))) {  /* Search for fixed string. */
+    TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
+    TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0);
+    TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
+    TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
+    TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
+    TRef trp0 = lj_ir_kkptr(J, NULL);
+    if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
+		    str->len-(MSize)start, pat->len)) {
+      TRef pos;
+      emitir(IRTG(IR_NE, IRT_P32), tr, trp0);
+      pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0));
+      J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
+      J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
+      rd->nres = 2;
+    } else {
+      emitir(IRTG(IR_EQ, IRT_P32), tr, trp0);
+      J->base[0] = TREF_NIL;
+    }
+  } else {  /* Search for pattern. */
+    recff_nyiu(J, rd);
+    return;
+  }
+}
+
+static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
+{
+  TRef trfmt = lj_ir_tostr(J, J->base[0]);
+  GCstr *fmt = argv2str(J, &rd->argv[0]);
+  int arg = 1;
+  TRef hdr, tr;
+  FormatState fs;
+  SFormat sf;
+  /* Specialize to the format string. */
+  emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
+  tr = hdr = recff_bufhdr(J);
+  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {  /* Parse format. */
+    TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
+    TRef trsf = lj_ir_kint(J, (int32_t)sf);
+    IRCallID id;
+    switch (STRFMT_TYPE(sf)) {
+    case STRFMT_LIT:
+      tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
+		  lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
+      break;
+    case STRFMT_INT:
+      id = IRCALL_lj_strfmt_putfnum_int;
+    handle_int:
+      if (!tref_isinteger(tra))
+	goto handle_num;
+      if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
+	tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
+		    emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
+      } else {
+#if LJ_HASFFI
+	tra = emitir(IRT(IR_CONV, IRT_U64), tra,
+		     (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT));
+	tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
+	lj_needsplit(J);
+#else
+	recff_nyiu(J, rd);  /* Don't bother working around this NYI. */
+	return;
+#endif
       }
       }
-    } else {  /* Complex case: remove in the middle. */
-      recff_nyiu(J);
+      break;
+    case STRFMT_UINT:
+      id = IRCALL_lj_strfmt_putfnum_uint;
+      goto handle_int;
+    case STRFMT_NUM:
+      id = IRCALL_lj_strfmt_putfnum;
+    handle_num:
+      tra = lj_ir_tonum(J, tra);
+      tr = lj_ir_call(J, id, tr, trsf, tra);
+      if (LJ_SOFTFP) lj_needsplit(J);
+      break;
+    case STRFMT_STR:
+      if (!tref_isstr(tra)) {
+	recff_nyiu(J, rd);  /* NYI: __tostring and non-string types for %s. */
+	return;
+      }
+      if (sf == STRFMT_STR)  /* Shortcut for plain %s. */
+	tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra);
+      else if ((sf & STRFMT_T_QUOTED))
+	tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
+      else
+	tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra);
+      break;
+    case STRFMT_CHAR:
+      tra = lj_opt_narrow_toint(J, tra);
+      if (sf == STRFMT_CHAR)  /* Shortcut for plain %c. */
+	tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
+		    emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
+      else
+	tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
+      break;
+    case STRFMT_PTR:  /* NYI */
+    case STRFMT_ERR:
+    default:
+      recff_nyiu(J, rd);
+      return;
     }
     }
-  }  /* else: Interpreter will throw. */
+  }
+  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
 }
 }
 
 
+/* -- Table library fast functions ---------------------------------------- */
+
 static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
 static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
 {
 {
   RecordIndex ix;
   RecordIndex ix;
@@ -792,11 +1058,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
       ix.idxchain = 0;
       ix.idxchain = 0;
       lj_record_idx(J, &ix);  /* Set new value. */
       lj_record_idx(J, &ix);  /* Set new value. */
     } else {  /* Complex case: insert in the middle. */
     } else {  /* Complex case: insert in the middle. */
-      recff_nyiu(J);
+      recff_nyiu(J, rd);
+      return;
     }
     }
   }  /* else: Interpreter will throw. */
   }  /* else: Interpreter will throw. */
 }
 }
 
 
+static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
+{
+  TRef tab = J->base[0];
+  if (tref_istab(tab)) {
+    TRef sep = !tref_isnil(J->base[1]) ?
+	       lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR);
+    TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ?
+	       lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
+    TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
+	       lj_opt_narrow_toint(J, J->base[3]) :
+	       lj_ir_call(J, IRCALL_lj_tab_len, tab);
+    TRef hdr = recff_bufhdr(J);
+    TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
+    emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
+    J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  }  /* else: Interpreter will throw. */
+  UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd)
+{
+  TRef tra = lj_opt_narrow_toint(J, J->base[0]);
+  TRef trh = lj_opt_narrow_toint(J, J->base[1]);
+  J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh);
+  UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
+{
+  TRef tr = J->base[0];
+  if (tref_istab(tr)) {
+    rd->nres = 0;
+    lj_ir_call(J, IRCALL_lj_tab_clear, tr);
+    J->needsnap = 1;
+  }  /* else: Interpreter will throw. */
+}
+
 /* -- I/O library fast functions ------------------------------------------ */
 /* -- I/O library fast functions ------------------------------------------ */
 
 
 /* Get FILE* for I/O function. Any I/O error aborts recording, so there's
 /* Get FILE* for I/O function. Any I/O error aborts recording, so there's
@@ -832,7 +1136,10 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
     TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero);
     TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero);
     TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
     TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
     if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
     if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
-      TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
+      IRIns *irs = IR(tref_ref(str));
+      TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ?
+		irs->op1 :
+		emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
       tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
       tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
       if (results_wanted(J) != 0)  /* Check result only if not ignored. */
       if (results_wanted(J) != 0)  /* Check result only if not ignored. */
 	emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
 	emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
@@ -854,6 +1161,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd)
   J->base[0] = TREF_TRUE;
   J->base[0] = TREF_TRUE;
 }
 }
 
 
+/* -- Debug library fast functions ---------------------------------------- */
+
+static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd)
+{
+  GCtab *mt;
+  TRef mtref;
+  TRef tr = J->base[0];
+  if (tref_istab(tr)) {
+    mt = tabref(tabV(&rd->argv[0])->metatable);
+    mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META);
+  } else if (tref_isudata(tr)) {
+    mt = tabref(udataV(&rd->argv[0])->metatable);
+    mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META);
+  } else {
+    mt = tabref(basemt_obj(J2G(J), &rd->argv[0]));
+    J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL;
+    return;
+  }
+  emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
+  J->base[0] = mt ? mtref : TREF_NIL;
+}
+
 /* -- Record calls to fast functions -------------------------------------- */
 /* -- Record calls to fast functions -------------------------------------- */
 
 
 #include "lj_recdef.h"
 #include "lj_recdef.h"

+ 1 - 1
Source/ThirdParty/LuaJIT/src/lj_ffrecord.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Fast function call recorder.
 ** Fast function call recorder.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_FFRECORD_H
 #ifndef _LJ_FFRECORD_H

+ 110 - 23
Source/ThirdParty/LuaJIT/src/lj_frame.h

@@ -1,6 +1,6 @@
 /*
 /*
 ** Stack frames.
 ** Stack frames.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */
 */
 
 
 #ifndef _LJ_FRAME_H
 #ifndef _LJ_FRAME_H
@@ -11,7 +11,16 @@
 
 
 /* -- Lua stack frame ----------------------------------------------------- */
 /* -- Lua stack frame ----------------------------------------------------- */
 
 
-/* Frame type markers in callee function slot (callee base-1). */
+/* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned:
+**
+**    PC  00  Lua frame
+** delta 001  C frame
+** delta 010  Continuation frame
+** delta 011  Lua vararg frame
+** delta 101  cpcall() frame
+** delta 110  ff pcall() frame
+** delta 111  ff pcall() frame with active hook
+*/
 enum {
 enum {
   FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
   FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
   FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
   FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
@@ -21,9 +30,47 @@ enum {
 #define FRAME_TYPEP		(FRAME_TYPE|FRAME_P)
 #define FRAME_TYPEP		(FRAME_TYPE|FRAME_P)
 
 
 /* Macros to access and modify Lua frames. */
 /* Macros to access and modify Lua frames. */
+#if LJ_FR2
+/* Two-slot frame info, required for 64 bit PC/GCRef:
+**
+**                   base-2  base-1      |  base  base+1 ...
+**                  [func   PC/delta/ft] | [slots ...]
+**                  ^-- frame            | ^-- base   ^-- top
+**
+** Continuation frames:
+**
+**   base-4  base-3  base-2  base-1      |  base  base+1 ...
+**  [cont      PC ] [func   PC/delta/ft] | [slots ...]
+**                  ^-- frame            | ^-- base   ^-- top
+*/
+#define frame_gc(f)		(gcval((f)-1))
+#define frame_ftsz(f)		((ptrdiff_t)(f)->ftsz)
+#define frame_pc(f)		((const BCIns *)frame_ftsz(f))
+#define setframe_gc(f, p, tp)	(setgcVraw((f)-1, (p), (tp)))
+#define setframe_ftsz(f, sz)	((f)->ftsz = (sz))
+#define setframe_pc(f, pc)	((f)->ftsz = (int64_t)(intptr_t)(pc))
+#else
+/* One-slot frame info, sufficient for 32 bit PC/GCRef:
+**
+**              base-1              |  base  base+1 ...
+**              lo     hi           |
+**             [func | PC/delta/ft] | [slots ...]
+**             ^-- frame            | ^-- base   ^-- top
+**
+** Continuation frames:
+**
+**  base-2      base-1              |  base  base+1 ...
+**  lo     hi   lo     hi           |
+** [cont | PC] [func | PC/delta/ft] | [slots ...]
+**             ^-- frame            | ^-- base   ^-- top
+*/
 #define frame_gc(f)		(gcref((f)->fr.func))
 #define frame_gc(f)		(gcref((f)->fr.func))
-#define frame_func(f)		(&frame_gc(f)->fn)
-#define frame_ftsz(f)		((f)->fr.tp.ftsz)
+#define frame_ftsz(f)		((ptrdiff_t)(f)->fr.tp.ftsz)
+#define frame_pc(f)		(mref((f)->fr.tp.pcr, const BCIns))
+#define setframe_gc(f, p, tp)	(setgcref((f)->fr.func, (p)), UNUSED(tp))
+#define setframe_ftsz(f, sz)	((f)->fr.tp.ftsz = (int32_t)(sz))
+#define setframe_pc(f, pc)	(setmref((f)->fr.tp.pcr, (pc)))
+#endif
 
 
 #define frame_type(f)		(frame_ftsz(f) & FRAME_TYPE)
 #define frame_type(f)		(frame_ftsz(f) & FRAME_TYPE)
 #define frame_typep(f)		(frame_ftsz(f) & FRAME_TYPEP)
 #define frame_typep(f)		(frame_ftsz(f) & FRAME_TYPEP)
@@ -33,27 +80,36 @@ enum {
 #define frame_isvarg(f)		(frame_typep(f) == FRAME_VARG)
 #define frame_isvarg(f)		(frame_typep(f) == FRAME_VARG)
 #define frame_ispcall(f)	((frame_ftsz(f) & 6) == FRAME_PCALL)
 #define frame_ispcall(f)	((frame_ftsz(f) & 6) == FRAME_PCALL)
 
 
-#define frame_pc(f)		(mref((f)->fr.tp.pcr, const BCIns))
+#define frame_func(f)		(&frame_gc(f)->fn)
+#define frame_delta(f)		(frame_ftsz(f) >> 3)
+#define frame_sized(f)		(frame_ftsz(f) & ~FRAME_TYPEP)
+
+enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
+
+#if LJ_FR2
+#define frame_contpc(f)		(frame_pc((f)-2))
+#define frame_contv(f)		(((f)-3)->u64)
+#else
 #define frame_contpc(f)		(frame_pc((f)-1))
 #define frame_contpc(f)		(frame_pc((f)-1))
-#if LJ_64
+#define frame_contv(f)		(((f)-1)->u32.lo)
+#endif
+#if LJ_FR2
+#define frame_contf(f)		((ASMFunction)(uintptr_t)((f)-3)->u64)
+#elif LJ_64
 #define frame_contf(f) \
 #define frame_contf(f) \
   ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \
   ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \
 			 (intptr_t)(int32_t)((f)-1)->u32.lo))
 			 (intptr_t)(int32_t)((f)-1)->u32.lo))
 #else
 #else
 #define frame_contf(f)		((ASMFunction)gcrefp(((f)-1)->gcr, void))
 #define frame_contf(f)		((ASMFunction)gcrefp(((f)-1)->gcr, void))
 #endif
 #endif
-#define frame_delta(f)		(frame_ftsz(f) >> 3)
-#define frame_sized(f)		(frame_ftsz(f) & ~FRAME_TYPEP)
+#define frame_iscont_fficb(f) \
+  (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK)
 
 
-#define frame_prevl(f)		((f) - (1+bc_a(frame_pc(f)[-1])))
+#define frame_prevl(f)		((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1])))
 #define frame_prevd(f)		((TValue *)((char *)(f) - frame_sized(f)))
 #define frame_prevd(f)		((TValue *)((char *)(f) - frame_sized(f)))
 #define frame_prev(f)		(frame_islua(f)?frame_prevl(f):frame_prevd(f))
 #define frame_prev(f)		(frame_islua(f)?frame_prevl(f):frame_prevd(f))
 /* Note: this macro does not skip over FRAME_VARG. */
 /* Note: this macro does not skip over FRAME_VARG. */
 
 
-#define setframe_pc(f, pc)	(setmref((f)->fr.tp.pcr, (pc)))
-#define setframe_ftsz(f, sz)	((f)->fr.tp.ftsz = (sz))
-#define setframe_gc(f, p)	(setgcref((f)->fr.func, (p)))
-
 /* -- C stack frame ------------------------------------------------------- */
 /* -- C stack frame ------------------------------------------------------- */
 
 
 /* Macros to access and modify the C stack frame chain. */
 /* Macros to access and modify the C stack frame chain. */
@@ -71,22 +127,42 @@ enum {
 #elif LJ_TARGET_X64
 #elif LJ_TARGET_X64
 #if LJ_ABI_WIN
 #if LJ_ABI_WIN
 #define CFRAME_OFS_PREV		(13*8)
 #define CFRAME_OFS_PREV		(13*8)
+#if LJ_GC64
+#define CFRAME_OFS_PC		(12*8)
+#define CFRAME_OFS_L		(11*8)
+#define CFRAME_OFS_ERRF		(21*4)
+#define CFRAME_OFS_NRES		(20*4)
+#define CFRAME_OFS_MULTRES	(8*4)
+#else
 #define CFRAME_OFS_PC		(25*4)
 #define CFRAME_OFS_PC		(25*4)
 #define CFRAME_OFS_L		(24*4)
 #define CFRAME_OFS_L		(24*4)
 #define CFRAME_OFS_ERRF		(23*4)
 #define CFRAME_OFS_ERRF		(23*4)
 #define CFRAME_OFS_NRES		(22*4)
 #define CFRAME_OFS_NRES		(22*4)
 #define CFRAME_OFS_MULTRES	(21*4)
 #define CFRAME_OFS_MULTRES	(21*4)
+#endif
 #define CFRAME_SIZE		(10*8)
 #define CFRAME_SIZE		(10*8)
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 9*16 + 4*8)
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 9*16 + 4*8)
 #define CFRAME_SHIFT_MULTRES	0
 #define CFRAME_SHIFT_MULTRES	0
 #else
 #else
 #define CFRAME_OFS_PREV		(4*8)
 #define CFRAME_OFS_PREV		(4*8)
+#if LJ_GC64
+#define CFRAME_OFS_PC		(3*8)
+#define CFRAME_OFS_L		(2*8)
+#define CFRAME_OFS_ERRF		(3*4)
+#define CFRAME_OFS_NRES		(2*4)
+#define CFRAME_OFS_MULTRES	(0*4)
+#else
 #define CFRAME_OFS_PC		(7*4)
 #define CFRAME_OFS_PC		(7*4)
 #define CFRAME_OFS_L		(6*4)
 #define CFRAME_OFS_L		(6*4)
 #define CFRAME_OFS_ERRF		(5*4)
 #define CFRAME_OFS_ERRF		(5*4)
 #define CFRAME_OFS_NRES		(4*4)
 #define CFRAME_OFS_NRES		(4*4)
 #define CFRAME_OFS_MULTRES	(1*4)
 #define CFRAME_OFS_MULTRES	(1*4)
+#endif
+#if LJ_NO_UNWIND
+#define CFRAME_SIZE		(12*8)
+#else
 #define CFRAME_SIZE		(10*8)
 #define CFRAME_SIZE		(10*8)
+#endif
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 16)
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 16)
 #define CFRAME_SHIFT_MULTRES	0
 #define CFRAME_SHIFT_MULTRES	0
 #endif
 #endif
@@ -103,6 +179,15 @@ enum {
 #define CFRAME_SIZE		64
 #define CFRAME_SIZE		64
 #endif
 #endif
 #define CFRAME_SHIFT_MULTRES	3
 #define CFRAME_SHIFT_MULTRES	3
+#elif LJ_TARGET_ARM64
+#define CFRAME_OFS_ERRF		196
+#define CFRAME_OFS_NRES		200
+#define CFRAME_OFS_PREV		160
+#define CFRAME_OFS_L		176
+#define CFRAME_OFS_PC		168
+#define CFRAME_OFS_MULTRES	192
+#define CFRAME_SIZE		208
+#define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_PPC
 #elif LJ_TARGET_PPC
 #if LJ_TARGET_XBOX360
 #if LJ_TARGET_XBOX360
 #define CFRAME_OFS_ERRF		424
 #define CFRAME_OFS_ERRF		424
@@ -113,7 +198,7 @@ enum {
 #define CFRAME_OFS_MULTRES	408
 #define CFRAME_OFS_MULTRES	408
 #define CFRAME_SIZE		384
 #define CFRAME_SIZE		384
 #define CFRAME_SHIFT_MULTRES	3
 #define CFRAME_SHIFT_MULTRES	3
-#elif LJ_ARCH_PPC64
+#elif LJ_ARCH_PPC32ON64
 #define CFRAME_OFS_ERRF		472
 #define CFRAME_OFS_ERRF		472
 #define CFRAME_OFS_NRES		468
 #define CFRAME_OFS_NRES		468
 #define CFRAME_OFS_PREV		448
 #define CFRAME_OFS_PREV		448
@@ -132,16 +217,8 @@ enum {
 #define CFRAME_SIZE		272
 #define CFRAME_SIZE		272
 #define CFRAME_SHIFT_MULTRES	3
 #define CFRAME_SHIFT_MULTRES	3
 #endif
 #endif
-#elif LJ_TARGET_PPCSPE
-#define CFRAME_OFS_ERRF		28
-#define CFRAME_OFS_NRES		24
-#define CFRAME_OFS_PREV		20
-#define CFRAME_OFS_L		16
-#define CFRAME_OFS_PC		12
-#define CFRAME_OFS_MULTRES	8
-#define CFRAME_SIZE		184
-#define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_MIPS
 #elif LJ_TARGET_MIPS
+#if LJ_ARCH_HASFPU
 #define CFRAME_OFS_ERRF		124
 #define CFRAME_OFS_ERRF		124
 #define CFRAME_OFS_NRES		120
 #define CFRAME_OFS_NRES		120
 #define CFRAME_OFS_PREV		116
 #define CFRAME_OFS_PREV		116
@@ -151,6 +228,16 @@ enum {
 #define CFRAME_SIZE		112
 #define CFRAME_SIZE		112
 #define CFRAME_SHIFT_MULTRES	3
 #define CFRAME_SHIFT_MULTRES	3
 #else
 #else
+#define CFRAME_OFS_ERRF		100
+#define CFRAME_OFS_NRES		96
+#define CFRAME_OFS_PREV		92
+#define CFRAME_OFS_L		88
+#define CFRAME_OFS_PC		44
+#define CFRAME_OFS_MULTRES	16
+#define CFRAME_SIZE		88
+#define CFRAME_SHIFT_MULTRES	3
+#endif
+#else
 #error "Missing CFRAME_* definitions for this architecture"
 #error "Missing CFRAME_* definitions for this architecture"
 #endif
 #endif
 
 

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно