Browse Source

FFI: Compile calls to stdcall, fastcall and vararg functions.

Mike Pall 14 năm trước cách đây
mục cha
commit
2dc574d06b
14 tập tin đã thay đổi với 262 bổ sung78 xóa
  1. 0 1
      doc/ext_ffi_semantics.html
  2. 21 3
      lib/dump.lua
  3. 4 4
      src/Makefile.dep
  4. 10 1
      src/lj_asm.c
  5. 7 3
      src/lj_asm_arm.h
  6. 9 3
      src/lj_asm_ppc.h
  7. 104 36
      src/lj_asm_x86.h
  8. 2 2
      src/lj_ccall.c
  9. 3 0
      src/lj_ccall.h
  10. 83 20
      src/lj_crecord.c
  11. 1 1
      src/lj_ctype.h
  12. 12 4
      src/lj_ircall.h
  13. 2 0
      src/lj_target_ppc.h
  14. 4 0
      src/lj_target_x86.h

+ 0 - 1
doc/ext_ffi_semantics.html

@@ -985,7 +985,6 @@ alignment &gt; 8&nbsp;bytes.</li>
 <li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
 <li>Pointer differences for element sizes that are not a power of
 two.</li>
-<li>Calls to non-cdecl or vararg C&nbsp;functions.</li>
 <li>Calls to C&nbsp;functions with aggregates passed or returned by
 value.</li>
 <li>Calls to ctype metamethods which are not plain functions.</li>

+ 21 - 3
lib/dump.lua

@@ -378,6 +378,24 @@ local function ridsp_name(ridsp)
   return ""
 end
 
+-- Dump CALL* function ref and return optional ctype.
+local function dumpcallfunc(tr, ins)
+  local ctype
+  if ins > 0 then
+    local m, ot, op1, op2 = traceir(tr, ins)
+    if band(ot, 31) == 0 then -- nil type means CARG(func, ctype).
+      ins = op1
+      ctype = formatk(tr, op2)
+    end
+  end
+  if ins < 0 then
+    out:write(format("[0x%x](", tonumber((tracek(tr, ins)))))
+  else
+    out:write(format("%04d (", ins))
+  end
+  return ctype
+end
+
 -- Recursively gather CALL* args and dump them.
 local function dumpcallargs(tr, ins)
   if ins < 0 then
@@ -447,15 +465,15 @@ local function dump_ir(tr, dumpsnap, dumpreg)
 		       irtype[t], op))
       local m1, m2 = band(m, 3), band(m, 3*4)
       if sub(op, 1, 4) == "CALL" then
+	local ctype
 	if m2 == 1*4 then -- op2 == IRMlit
 	  out:write(format("%-10s  (", vmdef.ircall[op2]))
-	elseif op2 < 0 then
-	  out:write(format("[0x%x](", tonumber((tracek(tr, op2)))))
 	else
-	  out:write(format("%04d (", op2))
+	  ctype = dumpcallfunc(tr, op2)
 	end
 	if op1 ~= -1 then dumpcallargs(tr, op1) end
 	out:write(")")
+	if ctype then out:write(" ctype ", ctype) end
       elseif op == "CNEW  " and op2 == -1 then
 	out:write(formatk(tr, op1))
       elseif m1 ~= 3 then -- op1 != IRMnone

+ 4 - 4
src/Makefile.dep

@@ -1,6 +1,6 @@
 buildvm.o: buildvm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \
  lj_obj.h lj_gc.h lj_bc.h lj_ir.h lj_ircall.h lj_jit.h lj_frame.h \
- lj_dispatch.h lj_ccall.h luajit.h \
+ lj_dispatch.h lj_ccall.h lj_ctype.h luajit.h \
  lj_traceerr.h
 buildvm_asm.o: buildvm_asm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \
  lj_bc.h
@@ -86,9 +86,9 @@ lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_bc.h lj_vm.h lj_char.h
 lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
- lj_gc.h lj_cparse.h lj_cconv.h lj_clib.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \
- lj_ffrecord.h lj_crecord.h
+ lj_gc.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ir.h lj_jit.h \
+ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_record.h lj_ffrecord.h lj_crecord.h
 lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h
 lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \

+ 10 - 1
src/lj_asm.c

@@ -888,7 +888,16 @@ static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
     nargs++;
     while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
   }
-  /* NYI: fastcall etc. */
+#if LJ_HASFFI
+  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
+    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
+    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
+    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
+#if LJ_TARGET_X86
+    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
+#endif
+  }
+#endif
   return (nargs | (ir->t.irt << CCI_OTSHIFT));
 }
 

+ 7 - 3
src/lj_asm_arm.h

@@ -331,13 +331,17 @@ static void asm_callx(ASMState *as, IRIns *ir)
 {
   IRRef args[CCI_NARGS_MAX];
   CCallInfo ci;
+  IRRef func;
+  IRIns *irf;
   ci.flags = asm_callx_flags(as, ir);
   asm_collectargs(as, ir, &ci, args);
   asm_setupresult(as, ir, &ci);
-  if (irref_isk(ir->op2)) {  /* Call to constant address. */
-    ci.func = (ASMFunction)(void *)(IR(ir->op2)->i);
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  if (irref_isk(func)) {  /* Call to constant address. */
+    ci.func = (ASMFunction)(void *)(irf->i);
   } else {  /* Need a non-argument register for indirect calls. */
-    Reg freg = ra_alloc1(as, ir->op2, RSET_RANGE(RID_R4, RID_R12+1));
+    Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12+1));
     emit_m(as, ARMI_BLXr, freg);
     ci.func = (ASMFunction)(void *)0;
   }

+ 9 - 3
src/lj_asm_ppc.h

@@ -284,6 +284,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 	ofs += 4;
     }
   }
+  if ((ci->flags & CCI_VARARG))  /* Vararg calls need to know about FPR use. */
+    emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
 }
 
 /* Setup result reg/sp for call. Evict scratch regs. */
@@ -336,14 +338,18 @@ static void asm_callx(ASMState *as, IRIns *ir)
 {
   IRRef args[CCI_NARGS_MAX];
   CCallInfo ci;
+  IRRef func;
+  IRIns *irf;
   ci.flags = asm_callx_flags(as, ir);
   asm_collectargs(as, ir, &ci, args);
   asm_setupresult(as, ir, &ci);
-  if (irref_isk(ir->op2)) {  /* Call to constant address. */
-    ci.func = (ASMFunction)(void *)(IR(ir->op2)->i);
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  if (irref_isk(func)) {  /* Call to constant address. */
+    ci.func = (ASMFunction)(void *)(irf->i);
   } else {  /* Need a non-argument register for indirect calls. */
     RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
-    Reg freg = ra_alloc1(as, ir->op2, allow);
+    Reg freg = ra_alloc1(as, func, allow);
     *--as->mcp = PPCI_BCTRL;
     *--as->mcp = PPCI_MTCTR | PPCF_T(freg);
     ci.func = (ASMFunction)(void *)0;

+ 104 - 36
src/lj_asm_x86.h

@@ -369,18 +369,76 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
 
 /* -- Calls --------------------------------------------------------------- */
 
+/* Count the required number of stack slots for a call. */
+static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+  uint32_t i, nargs = CCI_NARGS(ci);
+  int nslots = 0;
+#if LJ_64
+  if (LJ_ABI_WIN) {
+    nslots = (int)(nargs*2);  /* Only matters for more than four args. */
+  } else {
+    int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+    for (i = 0; i < nargs; i++)
+      if (args[i] && irt_isfp(IR(args[i])->t)) {
+	if (nfpr > 0) nfpr--; else nslots += 2;
+      } else {
+	if (ngpr > 0) ngpr--; else nslots += 2;
+      }
+  }
+#else
+  int ngpr = 0;
+  if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
+    ngpr = 2;
+  else if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
+    ngpr = 1;
+  for (i = 0; i < nargs; i++)
+    if (args[i] && irt_isfp(IR(args[i])->t)) {
+      nslots += irt_isnum(IR(args[i])->t) ? 2 : 1;
+    } else {
+      if (ngpr > 0) ngpr--; else nslots++;
+    }
+#endif
+  return nslots;
+}
+
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
   uint32_t n, nargs = CCI_NARGS(ci);
   int32_t ofs = STACKARG_OFS;
-  uint32_t gprs = REGARG_GPRS;
 #if LJ_64
+  uint32_t gprs = REGARG_GPRS;
   Reg fpr = REGARG_FIRSTFPR;
+#if !LJ_ABI_WIN
+  MCode *patchnfpr = NULL;
+#endif
+#else
+  uint32_t gprs = 0;
+  if ((ci->flags & CCI_CC_MASK) != CCI_CC_CDECL) {
+    if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
+      gprs = (REGARG_GPRS & 31);
+    else if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
+      gprs = REGARG_GPRS;
+  }
 #endif
-  lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL)));  /* Avoid stack adj. */
   if ((void *)ci->func)
     emit_call(as, ci->func);
+#if LJ_64
+  if ((ci->flags & CCI_VARARG)) {  /* Special handling for vararg calls. */
+#if LJ_ABI_WIN
+    for (n = 0; n < 4 && n < nargs; n++) {
+      IRIns *ir = IR(args[n]);
+      if (irt_isfp(ir->t))  /* Duplicate FPRs in GPRs. */
+	emit_rr(as, XO_MOVDto, (irt_isnum(ir->t) ? REX_64 : 0) | (fpr+n),
+		((gprs >> (n*5)) & 31));  /* Either MOVD or MOVQ. */
+    }
+#else
+    patchnfpr = --as->mcp;  /* Indicate number of used FPRs in register al. */
+    *--as->mcp = XI_MOVrib | RID_EAX;
+#endif
+  }
+#endif
   for (n = 0; n < nargs; n++) {  /* Setup args. */
     IRRef ref = args[n];
     IRIns *ir = IR(ref);
@@ -392,15 +450,16 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 #elif LJ_64
     /* POSIX/x64 argument registers are used in order of appearance. */
     if (irt_isfp(ir->t)) {
-      r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++;
+      r = fpr <= REGARG_LASTFPR ? fpr++ : 0;
     } else {
       r = gprs & 31; gprs >>= 5;
     }
 #else
-    if (irt_isfp(ir->t) || !(ci->flags & CCI_FASTCALL)) {
+    if (ref && irt_isfp(ir->t)) {
       r = 0;
     } else {
       r = gprs & 31; gprs >>= 5;
+      if (!ref) continue;
     }
 #endif
     if (r) {  /* Argument is in a register. */
@@ -442,6 +501,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
       ofs += sizeof(intptr_t);
     }
   }
+#if LJ_64 && !LJ_ABI_WIN
+  if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR;
+#endif
 }
 
 /* Setup result reg/sp for call. Evict scratch regs. */
@@ -503,23 +565,50 @@ static void asm_call(ASMState *as, IRIns *ir)
   asm_gencall(as, ci, args);
 }
 
+/* Return a constant function pointer or NULL for indirect calls. */
+static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
+{
+#if LJ_32
+  UNUSED(as);
+  if (irref_isk(func))
+    return (void *)irf->i;
+#else
+  if (irref_isk(func)) {
+    MCode *p;
+    if (irf->o == IR_KINT64)
+      p = (MCode *)(void *)ir_k64(irf)->u64;
+    else
+      p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
+    if (p - as->mcp == (int32_t)(p - as->mcp))
+      return p;  /* Call target is still in +-2GB range. */
+    /* Avoid the indirect case of emit_call(). Try to hoist func addr. */
+  }
+#endif
+  return NULL;
+}
+
 static void asm_callx(ASMState *as, IRIns *ir)
 {
   IRRef args[CCI_NARGS_MAX];
   CCallInfo ci;
+  IRRef func;
   IRIns *irf;
   ci.flags = asm_callx_flags(as, ir);
   asm_collectargs(as, ir, &ci, args);
   asm_setupresult(as, ir, &ci);
-  irf = IR(ir->op2);
-  if (LJ_32 && irref_isk(ir->op2)) {  /* Call to constant address on x86. */
-    ci.func = (ASMFunction)(void *)(uintptr_t)(uint32_t)irf->i;
-  } else {
-    /* Prefer a non-argument register or RID_RET for indirect calls. */
-    RegSet allow = (RSET_GPR & ~RSET_SCRATCH)|RID2RSET(RID_RET);
-    Reg r = ra_alloc1(as, ir->op2, allow);
+#if LJ_32
+  /* Have to readjust stack after non-cdecl calls due to callee cleanup. */
+  if ((ci.flags & CCI_CC_MASK) != CCI_CC_CDECL)
+    emit_spsub(as, 4 * asm_count_call_slots(as, &ci, args));
+#endif
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  ci.func = (ASMFunction)asm_callx_func(as, irf, func);
+  if (!(void *)ci.func) {
+    /* Use a (hoistable) non-scratch register for indirect calls. */
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+    Reg r = ra_alloc1(as, func, allow);
     emit_rr(as, XO_GROUP5, XOg_CALL, r);
-    ci.func = (ASMFunction)(void *)0;
   }
   asm_gencall(as, &ci, args);
 }
@@ -2608,35 +2697,14 @@ static void asm_ir(ASMState *as, IRIns *ir)
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   IRRef args[CCI_NARGS_MAX];
-  uint32_t nargs = (int)CCI_NARGS(ci);
-  int nslots = 0;
+  int nslots;
   asm_collectargs(as, ir, ci, args);
-#if LJ_64
-  if (LJ_ABI_WIN) {
-    nslots = (int)(nargs*2);  /* Only matters for more than four args. */
-  } else {
-    uint32_t i;
-    int ngpr = 6, nfpr = 8;
-    for (i = 0; i < nargs; i++)
-      if (args[i] && irt_isfp(IR(args[i])->t)) {
-	if (nfpr > 0) nfpr--; else nslots += 2;
-      } else {
-	if (ngpr > 0) ngpr--; else nslots += 2;
-      }
-  }
+  nslots = asm_count_call_slots(as, ci, args);
   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
     as->evenspill = nslots;
+#if LJ_64
   return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
 #else
-  if ((ci->flags & CCI_FASTCALL)) {
-    lua_assert(nargs <= 2);
-  } else {
-    uint32_t i;
-    for (i = 0; i < nargs; i++)
-      nslots += (args[i] && irt_isnum(IR(args[i])->t)) ? 2 : 1;
-    if (nslots > as->evenspill)  /* Leave room for args. */
-      as->evenspill = nslots;
-  }
   return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET);
 #endif
 }

+ 2 - 2
src/lj_ccall.c

@@ -402,7 +402,7 @@ static void ccall_struct_ret(CCallState *cc, int *rcl, uint8_t *dp, CTSize sz)
 /* -- Common C call handling ---------------------------------------------- */
 
 /* Infer the destination CTypeID for a vararg argument. */
-static CTypeID ccall_ctid_vararg(CTState *cts, cTValue *o)
+CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o)
 {
   if (tvisnumber(o)) {
     return CTID_DOUBLE;
@@ -506,7 +506,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
     } else {
       if (!(ct->info & CTF_VARARG))
 	lj_err_caller(L, LJ_ERR_FFI_NUMARG);  /* Too many arguments. */
-      did = ccall_ctid_vararg(cts, o);  /* Infer vararg type. */
+      did = lj_ccall_ctid_vararg(cts, o);  /* Infer vararg type. */
       isva = 1;
     }
     d = ctype_raw(cts, did);

+ 3 - 0
src/lj_ccall.h

@@ -7,6 +7,7 @@
 #define _LJ_CCALL_H
 
 #include "lj_obj.h"
+#include "lj_ctype.h"
 
 #if LJ_HASFFI
 
@@ -129,6 +130,8 @@ typedef struct CCallState {
 
 /* Really belongs to lj_vm.h. */
 LJ_ASMF void LJ_FASTCALL lj_vm_ffi_call(CCallState *cc);
+
+LJ_FUNC CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o);
 LJ_FUNC int lj_ccall_func(lua_State *L, GCcdata *cd);
 
 #endif

+ 83 - 20
src/lj_crecord.c

@@ -18,6 +18,7 @@
 #include "lj_cparse.h"
 #include "lj_cconv.h"
 #include "lj_clib.h"
+#include "lj_ccall.h"
 #include "lj_ir.h"
 #include "lj_jit.h"
 #include "lj_ircall.h"
@@ -364,7 +365,7 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp)
 
 /* -- Convert TValue to C type (store) ------------------------------------ */
 
-static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval)
+static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
 {
   CTState *cts = ctype_ctsG(J2G(J));
   CTypeID sid = CTID_P_VOID;
@@ -747,29 +748,88 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
 			   CTState *cts, CType *ct)
 {
   TRef args[CCI_NARGS_MAX];
+  CTypeID fid;
   MSize i, n;
-  TRef tr;
+  TRef tr, *base;
+  cTValue *o;
+#if LJ_TARGET_X86
+#if LJ_ABI_WIN
+  TRef *arg0 = NULL, *arg1 = NULL;
+#endif
+  int ngpr = 0;
+  if (ctype_cconv(ct->info) == CTCC_THISCALL)
+    ngpr = 1;
+  else if (ctype_cconv(ct->info) == CTCC_FASTCALL)
+    ngpr = 2;
+#endif
+
+  /* Skip initial attributes. */
+  fid = ct->sib;
+  while (fid) {
+    CType *ctf = ctype_get(cts, fid);
+    if (!ctype_isattrib(ctf->info)) break;
+    fid = ctf->sib;
+  }
   args[0] = TREF_NIL;
-  for (n = 0; J->base[n+1]; n++) {
+  for (n = 0, base = J->base+1, o = rd->argv+1; *base; n++, base++, o++) {
+    CTypeID did;
     CType *d;
-    do {
-      if (!ct->sib || n >= CCI_NARGS_MAX)
-	lj_trace_err(J, LJ_TRERR_NYICALL);
-      ct = ctype_get(cts, ct->sib);
-    } while (ctype_isattrib(ct->info));
-    if (!ctype_isfield(ct->info))
+
+    if (n >= CCI_NARGS_MAX)
       lj_trace_err(J, LJ_TRERR_NYICALL);
-    d = ctype_rawchild(cts, ct);
+
+    if (fid) {  /* Get argument type from field. */
+      CType *ctf = ctype_get(cts, fid);
+      fid = ctf->sib;
+      lua_assert(ctype_isfield(ctf->info));
+      did = ctype_cid(ctf->info);
+    } else {
+      if (!(ct->info & CTF_VARARG))
+        lj_trace_err(J, LJ_TRERR_NYICALL);  /* Too many arguments. */
+      did = lj_ccall_ctid_vararg(cts, o);  /* Infer vararg type. */
+    }
+    d = ctype_raw(cts, did);
     if (!(ctype_isnum(d->info) || ctype_isptr(d->info) ||
 	  ctype_isenum(d->info)))
       lj_trace_err(J, LJ_TRERR_NYICALL);
-    tr = crec_ct_tv(J, d, 0, J->base[n+1], &rd->argv[n+1]);
-    if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
-      if ((d->info & CTF_UNSIGNED))
-	tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0);
-      else
-	tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16, IRCONV_SEXT);
+    tr = crec_ct_tv(J, d, 0, *base, o);
+    if (ctype_isinteger_or_bool(d->info)) {
+      if (d->size < 4) {
+	if ((d->info & CTF_UNSIGNED))
+	  tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0);
+	else
+	  tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT);
+      }
     }
+#if LJ_TARGET_X86
+    /* 64 bit args must not end up in registers for fastcall/thiscall. */
+#if LJ_ABI_WIN
+    if (!ctype_isfp(d->info)) {
+      /* Sigh, the Windows/x86 ABI allows reordering across 64 bit args. */
+      if (tref_typerange(tr, IRT_I64, IRT_U64)) {
+	if (ngpr) {
+	  arg0 = &args[n]; args[n++] = TREF_NIL; ngpr--;
+	  if (ngpr) {
+	    arg1 = &args[n]; args[n++] = TREF_NIL; ngpr--;
+	  }
+	}
+      } else {
+	if (arg0) { *arg0 = tr; arg0 = NULL; n--; continue; }
+	if (arg1) { *arg1 = tr; arg1 = NULL; n--; continue; }
+	if (ngpr) ngpr--;
+      }
+    }
+#else
+    if (!ctype_isfp(d->info) && ngpr) {
+      if (tref_typerange(tr, IRT_I64, IRT_U64)) {
+	/* No reordering for other x86 ABIs. Simply add alignment args. */
+	do { args[n++] = TREF_NIL; } while (--ngpr);
+      } else {
+	ngpr--;
+      }
+    }
+#endif
+#endif
     args[n] = tr;
   }
   tr = args[0];
@@ -801,12 +861,15 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
     }
     if (!(ctype_isnum(ctr->info) || ctype_isptr(ctr->info) ||
 	  ctype_isvoid(ctr->info)) ||
-	ctype_isbool(ctr->info) || (ct->info & CTF_VARARG) ||
+	ctype_isbool(ctr->info) || t == IRT_CDATA)
+      lj_trace_err(J, LJ_TRERR_NYICALL);
+    if ((ct->info & CTF_VARARG)
 #if LJ_TARGET_X86
-	ctype_cconv(ct->info) != CTCC_CDECL ||
+	|| ctype_cconv(ct->info) != CTCC_CDECL
 #endif
-	t == IRT_CDATA)
-      lj_trace_err(J, LJ_TRERR_NYICALL);
+	)
+      func = emitir(IRT(IR_CARG, IRT_NIL), func,
+		    lj_ir_kint(J, ctype_typeid(cts, ct)));
     tr = emitir(IRT(IR_CALLXS, t), crec_call_args(J, rd, cts, ct), func);
     if (t == IRT_FLOAT || t == IRT_U32) {
       tr = emitconv(tr, IRT_NUM, t, 0);

+ 1 - 1
src/lj_ctype.h

@@ -117,7 +117,7 @@ LJ_STATIC_ASSERT(((int)CT_STRUCT & (int)CT_ARRAY) == CT_STRUCT);
   info = (info & ~(CTMASK_##field<<CTSHIFT_##field)) | \
 	  (((CTSize)(val) & CTMASK_##field) << CTSHIFT_##field)
 
-/* Calling conventions. */
+/* Calling conventions. ORDER CC */
 enum { CTCC_CDECL, CTCC_THISCALL, CTCC_FASTCALL, CTCC_STDCALL };
 
 /* Attribute numbers. */

+ 12 - 4
src/lj_ircall.h

@@ -27,15 +27,23 @@ typedef struct CCallInfo {
 #define CCI_CALL_N		(IR_CALLN << CCI_OPSHIFT)
 #define CCI_CALL_L		(IR_CALLL << CCI_OPSHIFT)
 #define CCI_CALL_S		(IR_CALLS << CCI_OPSHIFT)
-#define CCI_CALL_FN		(CCI_CALL_N|CCI_FASTCALL)
-#define CCI_CALL_FL		(CCI_CALL_L|CCI_FASTCALL)
-#define CCI_CALL_FS		(CCI_CALL_S|CCI_FASTCALL)
+#define CCI_CALL_FN		(CCI_CALL_N|CCI_CC_FASTCALL)
+#define CCI_CALL_FL		(CCI_CALL_L|CCI_CC_FASTCALL)
+#define CCI_CALL_FS		(CCI_CALL_S|CCI_CC_FASTCALL)
 
 /* C call info flags. */
 #define CCI_L			0x0100	/* Implicit L arg. */
 #define CCI_CASTU64		0x0200	/* Cast u64 result to number. */
 #define CCI_NOFPRCLOBBER	0x0400	/* Does not clobber any FPRs. */
-#define CCI_FASTCALL		0x0800	/* Fastcall convention. */
+#define CCI_VARARG		0x0800	/* Vararg function. */
+
+#define CCI_CC_MASK		0x3000	/* Calling convention mask. */
+#define CCI_CC_SHIFT		12
+/* ORDER CC */
+#define CCI_CC_CDECL		0x0000	/* Default cdecl calling convention. */
+#define CCI_CC_THISCALL		0x1000	/* Thiscall calling convention. */
+#define CCI_CC_FASTCALL		0x2000	/* Fastcall calling convention. */
+#define CCI_CC_STDCALL		0x3000	/* Stdcall calling convention. */
 
 /* Helpers for conditional function definitions. */
 #define IRCALLCOND_ANY(x)		x

+ 2 - 0
src/lj_target_ppc.h

@@ -207,7 +207,9 @@ typedef enum PPCIns {
   PPCI_BCTRL = 0x4e800421,
 
   PPCI_CRANDC = 0x4c000102,
+  PPCI_CRXOR = 0x4c000182,
   PPCI_CRAND = 0x4c000202,
+  PPCI_CREQV = 0x4c000242,
   PPCI_CRORC = 0x4c000342,
   PPCI_CROR = 0x4c000382,
 

+ 4 - 0
src/lj_target_x86.h

@@ -85,6 +85,7 @@ enum {
 #define REGARG_GPRS \
   (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
 #define REGARG_NUMGPR	4
+#define REGARG_NUMFPR	4
 #define REGARG_FIRSTFPR	RID_XMM0
 #define REGARG_LASTFPR	RID_XMM3
 #define STACKARG_OFS	(4*8)
@@ -96,6 +97,7 @@ enum {
   (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
    <<5))<<5))<<5))<<5))<<5))
 #define REGARG_NUMGPR	6
+#define REGARG_NUMFPR	8
 #define REGARG_FIRSTFPR	RID_XMM0
 #define REGARG_LASTFPR	RID_XMM7
 #define STACKARG_OFS	0
@@ -105,6 +107,7 @@ enum {
 #define RSET_SCRATCH	(RSET_ACD|RSET_FPR)
 #define REGARG_GPRS	(RID_ECX|(RID_EDX<<5))  /* Fastcall only. */
 #define REGARG_NUMGPR	2  /* Fastcall only. */
+#define REGARG_NUMFPR	0
 #define STACKARG_OFS	0
 #endif
 
@@ -192,6 +195,7 @@ typedef enum {
   XI_JCCs =	0x70, /* Really 7x. */
   XI_JCCn =	0x80, /* Really 0f8x. */
   XI_LEA =	0x8d,
+  XI_MOVrib =	0xb0, /* Really b0+r. */
   XI_MOVri =	0xb8, /* Really b8+r. */
   XI_ARITHib =	0x80,
   XI_ARITHi =	0x81,