Преглед на файлове

FFI: Add 64 bit bitwise operations.

Mike Pall преди 12 години
родител
ревизия
a98aede377
променени са 12 файла, в които са добавени 495 реда и са изтрити 60 реда
  1. 16 1
      doc/ext_ffi_semantics.html
  2. 3 0
      doc/extensions.html
  3. 5 4
      src/Makefile.dep
  4. 92 6
      src/lib_bit.c
  5. 76 0
      src/lj_carith.c
  6. 10 0
      src/lj_carith.h
  7. 95 0
      src/lj_crecord.c
  8. 6 0
      src/lj_crecord.h
  9. 46 20
      src/lj_ffrecord.c
  10. 6 1
      src/lj_ircall.h
  11. 14 27
      src/lj_opt_fold.c
  12. 126 1
      src/lj_opt_split.c

+ 16 - 1
doc/ext_ffi_semantics.html

@@ -730,6 +730,22 @@ You'll have to explicitly convert a 64 bit integer to a Lua
 number (e.g. for regular floating-point calculations) with
 <tt>tonumber()</tt>. But note this may incur a precision loss.</li>
 
+<li><b>64&nbsp;bit bitwise operations</b>: the rules for 64&nbsp;bit
+arithmetic operators apply analogously.<br>
+
+Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt>
+converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and
+returns a Lua number.<br>
+
+For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the
+conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to
+<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br>
+
+For all other operations, only the first argument is used to determine
+the output type. This implies that a cdata number as a shift count for
+shifts and rotates is accepted, but that alone does <em>not</em> cause
+a cdata number output.
+
 </ul>
 
 <h3 id="cdata_comp">Comparisons of cdata objects</h3>
@@ -1222,7 +1238,6 @@ value.</li>
 Other missing features:
 </p>
 <ul>
-<li>Bit operations for 64&nbsp;bit types.</li>
 <li>Arithmetic for <tt>complex</tt> numbers.</li>
 <li>Passing structs by value to vararg C&nbsp;functions.</li>
 <li><a href="extensions.html#exceptions">C++ exception interoperability</a>

+ 3 - 0
doc/extensions.html

@@ -113,6 +113,9 @@ bit.lshift bit.rshift bit.arshift bit.rol  bit.ror  bit.bswap
 This module is a LuaJIT built-in &mdash; you don't need to download or
 install Lua BitOp. The Lua BitOp site has full documentation for all
 <a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
+The FFI adds support for
+<a href="ext_ffi_semantics.html#cdata_arith">64&nbsp;bit bitwise operations<a>,
+using the same API functions.
 </p>
 <p>
 Please make sure to <tt>require</tt> the module before using any of

+ 5 - 4
src/Makefile.dep

@@ -7,7 +7,8 @@ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
  lj_lib.h lj_libdef.h
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_gc.h lj_cdata.h \
+ lj_cconv.h lj_carith.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_libdef.h
@@ -17,7 +18,7 @@ lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
 lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
 lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_state.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
  lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
 lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
  lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
@@ -58,8 +59,8 @@ lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_str.h
 lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_carith.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
+ lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
 lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
  lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \

+ 92 - 6
src/lib_bit.c

@@ -13,25 +13,82 @@
 #include "lj_obj.h"
 #include "lj_err.h"
 #include "lj_str.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#include "lj_carith.h"
+#endif
+#include "lj_ff.h"
 #include "lj_lib.h"
 
 /* ------------------------------------------------------------------------ */
 
 #define LJLIB_MODULE_bit
 
-LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_unary IR_TOBIT)
+#if LJ_HASFFI
+static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
 {
+  GCcdata *cd = lj_cdata_new_(L, id, 8);
+  *(uint64_t *)cdataptr(cd) = x;
+  setcdataV(L, L->base-1, cd);
+  return FFH_RES(1);
+}
+#endif
+
+LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_tobit)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  setintV(L->base-1, (int32_t)lj_carith_check64(L, 1, &id));
+  return FFH_RES(1);
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, ~x) : FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
+#else
   lj_lib_checknumber(L, 1);
   return FFH_RETRY;
+#endif
 }
-LJLIB_ASM_(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
-LJLIB_ASM_(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
 
 LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
 {
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
+  if (id) {
+    x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
+    return bit_result64(L, id, x);
+  }
+  if (id2) setintV(L->base+1, sh);
+  return FFH_RETRY;
+#else
   lj_lib_checknumber(L, 1);
   lj_lib_checkbit(L, 2);
   return FFH_RETRY;
+#endif
 }
 LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
 LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
@@ -40,9 +97,29 @@ LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
 
 LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
 {
+#if LJ_HASFFI
+  CTypeID id = 0;
+  TValue *o = L->base, *top = L->top;
+  int i = 0;
+  do { lj_carith_check64(L, ++i, &id); } while (++o < top);
+  if (id) {
+    CTState *cts = ctype_cts(L);
+    CType *ct = ctype_get(cts, id);
+    int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
+    uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
+    o = L->base;
+    do {
+      lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
+      if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
+    } while (++o < top);
+    return bit_result64(L, id, y);
+  }
+  return FFH_RETRY;
+#else
   int i = 0;
   do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
   return FFH_RETRY;
+#endif
 }
 LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
 LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
@@ -51,12 +128,21 @@ LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
 
 LJLIB_CF(bit_tohex)
 {
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t b = lj_carith_check64(L, 1, &id);
+  int32_t i, dig = id ? 16 : 8;
+  int32_t n = L->base+1>=L->top ? dig : (int32_t)lj_carith_check64(L, 2, &id2);
+  char buf[16];
+#else
   uint32_t b = (uint32_t)lj_lib_checkbit(L, 1);
-  int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2);
-  const char *hexdigits = "0123456789abcdef";
+  int32_t i, dig = 8;
+  int32_t n = L->base+1>=L->top ? dig : lj_lib_checkbit(L, 2);
   char buf[8];
+#endif
+  const char *hexdigits = "0123456789abcdef";
   if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
-  if (n > 8) n = 8;
+  if (n > dig) n = dig;
   for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
   lua_pushlstring(L, buf, (size_t)n);
   return 1;

+ 76 - 0
src/lj_carith.c

@@ -11,10 +11,12 @@
 #include "lj_err.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
+#include "lj_ir.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cdata.h"
 #include "lj_carith.h"
+#include "lj_strscan.h"
 
 /* -- C data arithmetic --------------------------------------------------- */
 
@@ -270,6 +272,80 @@ int lj_carith_op(lua_State *L, MMS mm)
   return lj_carith_meta(L, cts, &ca, mm);
 }
 
+/* -- 64 bit bit operations helpers --------------------------------------- */
+
+#if LJ_64
+#define B64DEF(name) \
+  static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
+#else
+/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
+#define B64DEF(name) \
+  uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
+#endif
+
+B64DEF(shl64) { return x << (sh&63); }
+B64DEF(shr64) { return x >> (sh&63); }
+B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
+B64DEF(rol64) { return lj_rol(x, (sh&63)); }
+B64DEF(ror64) { return lj_ror(x, (sh&63)); }
+
+#undef B64DEF
+
+uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
+{
+  switch (op) {
+  case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
+  case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
+  case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
+  case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
+  case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
+  default: lua_assert(0); break;
+  }
+  return x;
+}
+
+/* Equivalent to lj_lib_checkbit(), but handles cdata. */
+uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
+{
+  TValue *o = L->base + narg-1;
+  if (o >= L->top) {
+  err:
+    lj_err_argt(L, narg, LUA_TNUMBER);
+  } else if (LJ_LIKELY(tvisnumber(o))) {
+    /* Handled below. */
+  } else if (tviscdata(o)) {
+    CTState *cts = ctype_cts(L);
+    uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
+    CTypeID sid = cdataV(o)->ctypeid;
+    CType *s = ctype_get(cts, sid);
+    uint64_t x;
+    if (ctype_isref(s->info)) {
+      sp = *(void **)sp;
+      sid = ctype_cid(s->info);
+    }
+    s = ctype_raw(cts, sid);
+    if (ctype_isenum(s->info)) s = ctype_child(cts, s);
+    if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
+	CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
+      *id = CTID_UINT64;  /* Use uint64_t, since it has the highest rank. */
+    else if (!*id)
+      *id = CTID_INT64;  /* Use int64_t, unless already set. */
+    lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
+		   (uint8_t *)&x, sp, CCF_ARG(narg));
+    return x;
+  } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
+    goto err;
+  }
+  if (LJ_LIKELY(tvisint(o))) {
+    return intV(o);
+  } else {
+    int32_t i = lj_num2bit(numV(o));
+    if (LJ_DUALNUM) setintV(o, i);
+    return i;
+  }
+}
+
+
 /* -- 64 bit integer arithmetic helpers ----------------------------------- */
 
 #if LJ_32 && LJ_HASJIT

+ 10 - 0
src/lj_carith.h

@@ -12,6 +12,16 @@
 
 LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
 
+#if LJ_32
+LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
+#endif
+LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
+LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
+
 #if LJ_32 && LJ_HASJIT
 LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
 #endif

+ 95 - 0
src/lj_crecord.c

@@ -1626,6 +1626,101 @@ void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
   crec_finalizer(J, J->base[0], &rd->argv[1]);
 }
 
+/* -- 64 bit bit.* library functions -------------------------------------- */
+
+/* Determine bit operation type from argument type. */
+static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
+{
+  if (tviscdata(tv)) {
+    CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
+    if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
+    if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
+	CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
+      return CTID_UINT64;  /* Use uint64_t, since it has the highest rank. */
+    return CTID_INT64;  /* Otherwise use int64_t. */
+  }
+  return 0;  /* Use regular 32 bit ops. */
+}
+
+void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
+		       J->base[0], &rd->argv[0]);
+  if (!tref_isinteger(tr))
+    tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
+  J->base[0] = tr;
+}
+
+int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
+  if (id) {
+    TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+    tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
+    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+    return 1;
+  }
+  return 0;
+}
+
+int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id = 0;
+  MSize i;
+  for (i = 0; J->base[i] != 0; i++) {
+    CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
+    if (id < aid) id = aid;  /* Determine highest type rank of all arguments. */
+  }
+  if (id) {
+    CType *ct = ctype_get(cts, id);
+    uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
+    TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
+    for (i = 1; J->base[i] != 0; i++) {
+      TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
+      tr = emitir(ot, tr, tr2);
+    }
+    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+    return 1;
+  }
+  return 0;
+}
+
+int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id;
+  TRef tsh = 0;
+  if (J->base[0] && tref_iscdata(J->base[1])) {
+    tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
+		     J->base[1], &rd->argv[1]);
+    if (!tref_isinteger(tsh))
+      tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
+    J->base[1] = tsh;
+  }
+  id = crec_bit64_type(cts, &rd->argv[0]);
+  if (id) {
+    TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+    uint32_t op = rd->data;
+    if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
+    if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
+	!tref_isk(tsh))
+      tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
+#ifdef LJ_TARGET_UNIFYROT
+      if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+	op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+	tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+      }
+#endif
+    tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
+    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+    return 1;
+  }
+  return 0;
+}
+
 /* -- Miscellaneous library functions ------------------------------------- */
 
 void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd)

+ 6 - 0
src/lj_crecord.h

@@ -25,6 +25,12 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
+
+LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
+
 LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
 #endif
 

+ 46 - 20
src/lj_ffrecord.c

@@ -584,40 +584,66 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
 
 /* -- Bit library fast functions ------------------------------------------ */
 
-/* Record unary bit.tobit, bit.bnot, bit.bswap. */
+/* Record bit.tobit. */
+static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
+{
+  TRef tr = J->base[0];
+#if LJ_HASFFI
+  if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
+#endif
+  J->base[0] = lj_opt_narrow_tobit(J, tr);
+  UNUSED(rd);
+}
+
+/* Record unary bit.bnot, bit.bswap. */
 static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
 {
-  TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
-  J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0);
+#if LJ_HASFFI
+  if (recff_bit64_unary(J, rd))
+    return;
+#endif
+  J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
 }
 
 /* Record N-ary bit.band, bit.bor, bit.bxor. */
 static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
 {
-  TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
-  uint32_t op = rd->data;
-  BCReg i;
-  for (i = 1; J->base[i] != 0; i++)
-    tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i]));
-  J->base[0] = tr;
+#if LJ_HASFFI
+  if (recff_bit64_nary(J, rd))
+    return;
+#endif
+  {
+    TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
+    uint32_t ot = IRTI(rd->data);
+    BCReg i;
+    for (i = 1; J->base[i] != 0; i++)
+      tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
+    J->base[0] = tr;
+  }
 }
 
 /* Record bit shifts. */
 static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
 {
-  TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
-  TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
-  IROp op = (IROp)rd->data;
-  if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
-      !tref_isk(tsh))
-    tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
+#if LJ_HASFFI
+  if (recff_bit64_shift(J, rd))
+    return;
+#endif
+  {
+    TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
+    TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
+    IROp op = (IROp)rd->data;
+    if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
+	!tref_isk(tsh))
+      tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
 #ifdef LJ_TARGET_UNIFYROT
-  if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
-    op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
-    tsh = emitir(IRTI(IR_NEG), tsh, tsh);
-  }
+    if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+      op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+      tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+    }
 #endif
-  J->base[0] = emitir(IRTI(op), tr, tsh);
+    J->base[0] = emitir(IRTI(op), tr, tsh);
+  }
 }
 
 /* -- String library fast functions --------------------------------------- */

+ 6 - 1
src/lj_ircall.h

@@ -172,7 +172,12 @@ typedef struct CCallInfo {
   _(FFI,	memcpy,			3,         S, PTR, 0) \
   _(FFI,	memset,			3,         S, PTR, 0) \
   _(FFI,	lj_vm_errno,		0,         S, INT, CCI_NOFPRCLOBBER) \
-  _(FFI32,	lj_carith_mul64,	ARG2_64,   N, I64, CCI_NOFPRCLOBBER)
+  _(FFI32,	lj_carith_mul64,	ARG2_64,   N, I64, CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_shl64,	3,         N, U64, CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_shr64,	3,         N, U64, CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_sar64,	3,         N, U64, CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_rol64,	3,         N, U64, CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_ror64,	3,         N, U64, CCI_NOFPRCLOBBER) \
   \
   /* End of list. */
 

+ 14 - 27
src/lj_opt_fold.c

@@ -22,8 +22,8 @@
 #include "lj_trace.h"
 #if LJ_HASFFI
 #include "lj_ctype.h"
-#endif
 #include "lj_carith.h"
+#endif
 #include "lj_vm.h"
 #include "lj_strscan.h"
 
@@ -336,11 +336,9 @@ LJFOLDF(kfold_intcomp0)
 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
 {
   switch (op) {
-#if LJ_64 || LJ_HASFFI
+#if LJ_HASFFI
   case IR_ADD: k1 += k2; break;
   case IR_SUB: k1 -= k2; break;
-#endif
-#if LJ_HASFFI
   case IR_MUL: k1 *= k2; break;
   case IR_BAND: k1 &= k2; break;
   case IR_BOR: k1 |= k2; break;
@@ -392,20 +390,10 @@ LJFOLD(BROL KINT64 KINT)
 LJFOLD(BROR KINT64 KINT)
 LJFOLDF(kfold_int64shift)
 {
-#if LJ_HASFFI || LJ_64
+#if LJ_HASFFI
   uint64_t k = ir_k64(fleft)->u64;
   int32_t sh = (fright->i & 63);
-  switch ((IROp)fins->o) {
-  case IR_BSHL: k <<= sh; break;
-#if LJ_HASFFI
-  case IR_BSHR: k >>= sh; break;
-  case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
-  case IR_BROL: k = lj_rol(k, sh); break;
-  case IR_BROR: k = lj_ror(k, sh); break;
-#endif
-  default: lua_assert(0); break;
-  }
-  return INT64FOLD(k);
+  return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
 #else
   UNUSED(J); lua_assert(0); return FAILFOLD;
 #endif
@@ -1192,7 +1180,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
   ** But this is mainly intended for simple address arithmetic.
   ** Also it's easier for the backend to optimize the original multiplies.
   */
-  if (k == 1) {  /* i * 1 ==> i */
+  if (k == 0) {  /* i * 0 ==> 0 */
+    return RIGHTFOLD;
+  } else if (k == 1) {  /* i * 1 ==> i */
     return LEFTFOLD;
   } else if ((k & (k-1)) == 0) {  /* i * 2^k ==> i << k */
     fins->o = IR_BSHL;
@@ -1205,9 +1195,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
 LJFOLD(MUL any KINT)
 LJFOLDF(simplify_intmul_k32)
 {
-  if (fright->i == 0)  /* i * 0 ==> 0 */
-    return INTFOLD(0);
-  else if (fright->i > 0)
+  if (fright->i >= 0)
     return simplify_intmul_k(J, fright->i);
   return NEXTFOLD;
 }
@@ -1215,14 +1203,13 @@ LJFOLDF(simplify_intmul_k32)
 LJFOLD(MUL any KINT64)
 LJFOLDF(simplify_intmul_k64)
 {
-  if (ir_kint64(fright)->u64 == 0)  /* i * 0 ==> 0 */
-    return INT64FOLD(0);
-#if LJ_64
-  /* NYI: SPLIT for BSHL and 32 bit backend support. */
-  else if (ir_kint64(fright)->u64 < 0x80000000u)
+#if LJ_HASFFI
+  if (ir_kint64(fright)->u64 < 0x80000000u)
     return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
-#endif
   return NEXTFOLD;
+#else
+  UNUSED(J); lua_assert(0); return FAILFOLD;
+#endif
 }
 
 LJFOLD(MOD any KINT)
@@ -1522,7 +1509,7 @@ LJFOLD(BOR BOR KINT64)
 LJFOLD(BXOR BXOR KINT64)
 LJFOLDF(reassoc_intarith_k64)
 {
-#if LJ_HASFFI || LJ_64
+#if LJ_HASFFI
   IRIns *irk = IR(fleft->op2);
   if (irk->o == IR_KINT64) {
     uint64_t k = kfold_int64arith(ir_k64(irk)->u64,

+ 126 - 1
src/lj_opt_split.c

@@ -140,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
 }
+#endif
 
 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -156,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
 }
-#endif
 
 /* Emit a CALLN with two split 64 bit arguments. */
 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -196,6 +196,118 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
   return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
 }
 
+#if LJ_HASFFI
+static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
+			    IRIns *oir, IRIns *nir, IRIns *ir)
+{
+  IROp op = ir->o;
+  IRRef kref = nir->op2;
+  if (irref_isk(kref)) {  /* Optimize constant shifts. */
+    int32_t k = (IR(kref)->i & 63);
+    IRRef lo = nir->op1, hi = hisubst[ir->op1];
+    if (op == IR_BROL || op == IR_BROR) {
+      if (op == IR_BROR) k = (-k & 63);
+      if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
+      if (k == 0) {
+      passthrough:
+	J->cur.nins--;
+	ir->prev = lo;
+	return hi;
+      } else {
+	TRef k1, k2;
+	IRRef t1, t2, t3, t4;
+	J->cur.nins--;
+	k1 = lj_ir_kint(J, k);
+	k2 = lj_ir_kint(J, (-k & 31));
+	t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
+	t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
+	t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
+	t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
+	ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
+	return split_emit(J, IRTI(IR_BOR), t2, t3);
+      }
+    } else if (k == 0) {
+      goto passthrough;
+    } else if (k < 32) {
+      if (op == IR_BSHL) {
+	IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
+	IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
+	return split_emit(J, IRTI(IR_BOR), t1, t2);
+      } else {
+	IRRef t1 = ir->prev, t2;
+	lua_assert(op == IR_BSHR || op == IR_BSAR);
+	nir->o = IR_BSHR;
+	t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
+	ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
+	return split_emit(J, IRTI(op), hi, kref);
+      }
+    } else {
+      if (op == IR_BSHL) {
+	if (k == 32)
+	  J->cur.nins--;
+	else
+	  lo = ir->prev;
+	ir->prev = lj_ir_kint(J, 0);
+	return lo;
+      } else {
+	lua_assert(op == IR_BSHR || op == IR_BSAR);
+	if (k == 32) {
+	  J->cur.nins--;
+	  ir->prev = hi;
+	} else {
+	  nir->op1 = hi;
+	}
+	if (op == IR_BSHR)
+	  return lj_ir_kint(J, 0);
+	else
+	  return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
+      }
+    }
+  }
+  return split_call_li(J, hisubst, oir, ir,
+		       op - IR_BSHL + IRCALL_lj_carith_shl64);
+}
+
+static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
+			 IRIns *nir, IRIns *ir)
+{
+  IROp op = ir->o;
+  IRRef hi, kref = nir->op2;
+  if (irref_isk(kref)) {  /* Optimize bit operations with lo constant. */
+    int32_t k = IR(kref)->i;
+    if (k == 0 || k == -1) {
+      if (op == IR_BAND) k = ~k;
+      if (k == 0) {
+	J->cur.nins--;
+	ir->prev = nir->op1;
+      } else if (op == IR_BXOR) {
+	nir->o = IR_BNOT;
+	nir->op2 = 0;
+      } else {
+	J->cur.nins--;
+	ir->prev = kref;
+      }
+    }
+  }
+  hi = hisubst[ir->op1];
+  kref = hisubst[ir->op2];
+  if (irref_isk(kref)) {  /* Optimize bit operations with hi constant. */
+    int32_t k = IR(kref)->i;
+    if (k == 0 || k == -1) {
+      if (op == IR_BAND) k = ~k;
+      if (k == 0) {
+	return hi;
+      } else if (op == IR_BXOR) {
+	return split_emit(J, IRTI(IR_BNOT), hi, 0);
+      } else {
+	return kref;
+      }
+    }
+  }
+  return split_emit(J, IRTI(op), hi, kref);
+}
+#endif
+
 /* Transform the old IR to the new IR. */
 static void split_ir(jit_State *J)
 {
@@ -417,6 +529,19 @@ static void split_ir(jit_State *J)
 			   irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
 					      IRCALL_lj_carith_powu64);
 	break;
+      case IR_BNOT:
+	hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
+	break;
+      case IR_BSWAP:
+	ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
+	hi = nref;
+	break;
+      case IR_BAND: case IR_BOR: case IR_BXOR:
+	hi = split_bitop(J, hisubst, nir, ir);
+	break;
+      case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
+	hi = split_bitshift(J, hisubst, oir, nir, ir);
+	break;
       case IR_FLOAD:
 	lua_assert(ir->op2 == IRFL_CDATA_INT64);
 	hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);