Browse Source

FFI: Record simple C function calls.

Only handles cdecl and fixarg C functions.
Doesn't handle pass-by-value aggregates.
Doesn't handle 64 bit args/returns on 32 bit CPUs.
Mike Pall 14 years ago
parent
commit
26fadcd25b
7 changed files with 177 additions and 35 deletions
  1. 8 3
      lib/dump.lua
  2. 92 25
      src/lj_asm.c
  3. 68 3
      src/lj_crecord.c
  4. 2 2
      src/lj_ir.h
  5. 1 0
      src/lj_opt_fold.c
  6. 5 2
      src/lj_opt_mem.c
  7. 1 0
      src/lj_traceerr.h

+ 8 - 3
lib/dump.lua

@@ -429,9 +429,15 @@ local function dump_ir(tr, dumpsnap, dumpreg)
 		       band(ot, 128) == 0 and " " or ">",
 		       band(ot, 64) == 0 and " " or "+",
 		       irtype[t], op))
-      local m1 = band(m, 3)
+      local m1, m2 = band(m, 3), band(m, 3*4)
       if sub(op, 1, 4) == "CALL" then
-	out:write(format("%-10s  (", vmdef.ircall[op2]))
+	if m2 == 1*4 then -- op2 == IRMlit
+	  out:write(format("%-10s  (", vmdef.ircall[op2]))
+	elseif op2 < 0 then
+	  out:write(format("[0x%x](", tonumber((tracek(tr, op2)))))
+	else
+	  out:write(format("%04d (", op2))
+	end
 	if op1 ~= -1 then dumpcallargs(tr, op1) end
 	out:write(")")
       elseif op == "CNEW  " and op2 == -1 then
@@ -442,7 +448,6 @@ local function dump_ir(tr, dumpsnap, dumpreg)
 	else
 	  out:write(format(m1 == 0 and "%04d" or "#%-3d", op1))
 	end
-	local m2 = band(m, 3*4)
 	if m2 ~= 3*4 then -- op2 != IRMnone
 	  if m2 == 1*4 then -- op2 == IRMlit
 	    local litn = litname[op]

+ 92 - 25
src/lj_asm.c

@@ -1459,7 +1459,6 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  RegSet allow = RSET_ALL;
   uint32_t n, nargs = CCI_NARGS(ci);
   int32_t ofs = STACKARG_OFS;
   uint32_t gprs = REGARG_GPRS;
@@ -1467,24 +1466,25 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
   Reg fpr = REGARG_FIRSTFPR;
 #endif
   lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL)));  /* Avoid stack adj. */
-  emit_call(as, ci->func);
+  if ((void *)ci->func)
+    emit_call(as, ci->func);
   for (n = 0; n < nargs; n++) {  /* Setup args. */
     IRRef ref = args[n];
     IRIns *ir = IR(ref);
     Reg r;
 #if LJ_64 && LJ_ABI_WIN
     /* Windows/x64 argument registers are strictly positional. */
-    r = irt_isnum(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31);
+    r = irt_isfp(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31);
     fpr++; gprs >>= 5;
 #elif LJ_64
     /* POSIX/x64 argument registers are used in order of appearance. */
-    if (irt_isnum(ir->t)) {
+    if (irt_isfp(ir->t)) {
       r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++;
     } else {
       r = gprs & 31; gprs >>= 5;
     }
 #else
-    if (irt_isnum(ir->t) || !(ci->flags & CCI_FASTCALL)) {
+    if (irt_isfp(ir->t) || !(ci->flags & CCI_FASTCALL)) {
       r = 0;
     } else {
       r = gprs & 31; gprs >>= 5;
@@ -1514,22 +1514,16 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 	emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
 	emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
       } else {
-	if ((allow & RSET_FPR) == RSET_EMPTY)
-	  lj_trace_err(as->J, LJ_TRERR_NYICOAL);
-	r = ra_alloc1(as, ref, allow & RSET_FPR);
-	allow &= ~RID2RSET(r);
+	r = ra_alloc1(as, ref, RSET_FPR);
 	emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto,
 		  r, RID_ESP, ofs);
       }
-      ofs += 8;
+      ofs += (LJ_32 && irt_isfloat(ir->t)) ? 4 : 8;
     } else {  /* Non-FP argument is on stack. */
       if (LJ_32 && ref < ASMREF_TMP1) {
 	emit_movmroi(as, RID_ESP, ofs, ir->i);
       } else {
-	if ((allow & RSET_GPR) == RSET_EMPTY)
-	  lj_trace_err(as->J, LJ_TRERR_NYICOAL);
-	r = ra_alloc1(as, ref, allow & RSET_GPR);
-	allow &= ~RID2RSET(r);
+	r = ra_alloc1(as, ref, RSET_GPR);
 	emit_movtomro(as, REX_64IR(ir, r), RID_ESP, ofs);
       }
       ofs += sizeof(intptr_t);
@@ -1575,7 +1569,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 	emit_movtomro(as, RID_RET, RID_ESP, ofs);
 	emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
       } else {
-	emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+	emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
+		  irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
       }
 #endif
     } else {
@@ -1585,7 +1580,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   }
 }
 
-/* Collect arguments from CALL* and ARG instructions. */
+/* Collect arguments from CALL* and CARG instructions. */
 static void asm_collectargs(ASMState *as, IRIns *ir,
 			    const CCallInfo *ci, IRRef *args)
 {
@@ -1610,6 +1605,40 @@ static void asm_call(ASMState *as, IRIns *ir)
   asm_gencall(as, ci, args);
 }
 
+/* Reconstruct CCallInfo flags for CALLX*. */
+static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
+{
+  uint32_t nargs = 0;
+  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
+    IRIns *ira = IR(ir->op1);
+    nargs++;
+    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
+  }
+  /* NYI: fastcall etc. */
+  return (nargs | (ir->t.irt << CCI_OTSHIFT));
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX];
+  CCallInfo ci;
+  IRIns *irf;
+  ci.flags = asm_callx_flags(as, ir);
+  asm_collectargs(as, ir, &ci, args);
+  asm_setupresult(as, ir, &ci);
+  irf = IR(ir->op2);
+  if (LJ_32 && irref_isk(ir->op2)) {  /* Call to constant address on x86. */
+    ci.func = (ASMFunction)(void *)(uintptr_t)(uint32_t)irf->i;
+  } else {
+    /* Prefer a non-argument register or RID_RET for indirect calls. */
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH)|RID2RSET(RID_RET);
+    Reg r = ra_alloc1(as, ir->op2, allow);
+    emit_rr(as, XO_GROUP5, XOg_CALL, r);
+    ci.func = (ASMFunction)(void *)0;
+  }
+  asm_gencall(as, &ci, args);
+}
+
 /* -- Returns ------------------------------------------------------------- */
 
 /* Return to lower frame. Guard that it goes to the right spot. */
@@ -4086,6 +4115,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
 
   /* Calls. */
   case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+  case IR_CALLXS: asm_callx(as, ir); break;
   case IR_CARG: break;
 
   default:
@@ -4113,6 +4143,43 @@ static void asm_trace(ASMState *as)
 
 /* -- Trace setup --------------------------------------------------------- */
 
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  IRRef args[CCI_NARGS_MAX];
+  uint32_t nargs = (int)CCI_NARGS(ci);
+  int nslots = 0;
+  asm_collectargs(as, ir, ci, args);
+#if LJ_64
+  if (LJ_ABI_WIN) {
+    nslots = (int)(nargs*2);  /* Only matters for more than four args. */
+  } else {
+    uint32_t i;
+    int ngpr = 6, nfpr = 8;
+    for (i = 0; i < nargs; i++)
+      if (irt_isfp(IR(args[i])->t)) {
+	if (nfpr > 0) nfpr--; else nslots += 2;
+      } else {
+	if (ngpr > 0) ngpr--; else nslots += 2;
+      }
+  }
+  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
+    as->evenspill = nslots;
+  return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
+#else
+  if ((ci->flags & CCI_FASTCALL)) {
+    lua_assert(nargs <= 2);
+  } else {
+    uint32_t i;
+    for (i = 0; i < nargs; i++)
+      nslots += irt_isnum(IR(args[i])->t) ? 2 : 1;
+    if (nslots > as->evenspill)  /* Leave room for args. */
+      as->evenspill = nslots;
+  }
+  return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET);
+#endif
+}
+
 /* Clear reg/sp for all instructions and add register hints. */
 static void asm_setup_regsp(ASMState *as, GCtrace *T)
 {
@@ -4161,17 +4228,17 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T)
 	}
       }
       break;
+    case IR_CALLXS: {
+      CCallInfo ci;
+      ci.flags = asm_callx_flags(as, ir);
+      ir->prev = asm_setup_call_slots(as, ir, &ci);
+      if (inloop)
+	as->modset |= RSET_SCRATCH;
+      continue;
+      }
     case IR_CALLN: case IR_CALLL: case IR_CALLS: {
       const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-#if LJ_64
-      lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6));
-      ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
-#else
-      lua_assert(!(ci->flags & CCI_FASTCALL) || CCI_NARGS(ci) <= 2);
-      if (CCI_NARGS(ci) > (uint32_t)as->evenspill)  /* Leave room for args. */
-	as->evenspill = (int32_t)CCI_NARGS(ci);
-      ir->prev = REGSP_HINT(RID_RET);
-#endif
+      ir->prev = asm_setup_call_slots(as, ir, ci);
       if (inloop)
 	as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
 		      (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;

+ 68 - 3
src/lj_crecord.c

@@ -670,14 +670,79 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
   }
 }
 
+/* Record argument conversions. */
+static TRef crec_call_args(jit_State *J, RecordFFData *rd,
+			   CTState *cts, CType *ct)
+{
+  TRef args[CCI_NARGS_MAX];
+  MSize i, n;
+  TRef tr;
+  args[0] = TREF_NIL;
+  for (n = 0; J->base[n+1]; n++) {
+    CType *d;
+    do {
+      if (!ct->sib)
+	lj_trace_err(J, LJ_TRERR_NYICALL);
+      ct = ctype_get(cts, ct->sib);
+    } while (ctype_isattrib(ct->info));
+    if (!ctype_isfield(ct->info))
+      lj_trace_err(J, LJ_TRERR_NYICALL);
+    d = ctype_rawchild(cts, ct);
+    if (ctype_isenum(d->info)) d = ctype_child(cts, d);
+    if (!(ctype_isnum(d->info) || ctype_isptr(d->info)))
+      lj_trace_err(J, LJ_TRERR_NYICALL);
+    args[n] = crec_ct_tv(J, d, 0, J->base[n+1], &rd->argv[n+1]);
+  }
+  tr = args[0];
+  for (i = 1; i < n; i++)
+    tr = emitir(IRT(IR_CARG, IRT_NIL), tr, args[i]);
+  return tr;
+}
+
+/* Record function call. */
+static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CType *ct = ctype_raw(cts, cd->typeid);
+  IRType tp = IRT_PTR;
+  if (ctype_isptr(ct->info)) {
+    tp = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32;
+    ct = ctype_rawchild(cts, ct);
+  }
+  if (ctype_isfunc(ct->info)) {
+    TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR);
+    CType *ctr = ctype_rawchild(cts, ct);
+    IRType t = crec_ct2irt(ctr);
+    TRef tr;
+    if (ctype_isenum(ctr->info)) ctr = ctype_child(cts, ctr);
+    if (!(ctype_isnum(ctr->info) || ctype_isptr(ctr->info)) ||
+	ctype_isbool(ctr->info) || (ct->info & CTF_VARARG) ||
+#if LJ_TARGET_X86
+	ctype_cconv(ct->info) != CTCC_CDECL ||
+#endif
+	t == IRT_CDATA || (LJ_32 && (t == IRT_I64 || t == IRT_U64)))
+      lj_trace_err(J, LJ_TRERR_NYICALL);
+    tr = emitir(IRT(IR_CALLXS, t), crec_call_args(J, rd, cts, ct), func);
+    if (t == IRT_FLOAT || t == IRT_U32) {
+      tr = emitconv(tr, IRT_NUM, t, 0);
+    } else if (t == IRT_PTR || (LJ_64 && t == IRT_P32) ||
+	       (LJ_64 && (t == IRT_I64 || t == IRT_U64))) {
+      TRef trid = lj_ir_kint(J, ctype_cid(ct->info));
+      tr = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, tr);
+    }
+    J->base[0] = tr;
+    return 1;
+  }
+  return 0;
+}
+
 void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
 {
   GCcdata *cd = argv2cdata(J, J->base[0], &rd->argv[0]);
-  if (cd->typeid == CTID_CTYPEID) {
+  if (cd->typeid == CTID_CTYPEID)
     crec_alloc(J, rd, crec_constructor(J, cd, J->base[0]));
-  } else {
+  else if (!crec_call(J, rd, cd))
     lj_trace_err(J, LJ_TRERR_BADTYPE);
-  }
 }
 
 static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)

+ 2 - 2
src/lj_ir.h

@@ -129,6 +129,7 @@
   _(CALLN,	N , ref, lit) \
   _(CALLL,	L , ref, lit) \
   _(CALLS,	S , ref, lit) \
+  _(CALLXS,	S , ref, ref) \
   _(CARG,	N , ref, ref) \
   \
   /* End of list. */
@@ -236,7 +237,7 @@ typedef struct CCallInfo {
 } CCallInfo;
 
 #define CCI_NARGS(ci)		((ci)->flags & 0xff)	/* Extract # of args. */
-#define CCI_NARGS_MAX		16		/* Max. # of args. */
+#define CCI_NARGS_MAX		32			/* Max. # of args. */
 
 #define CCI_OTSHIFT		16
 #define CCI_OPTYPE(ci)		((ci)->flags >> CCI_OTSHIFT)  /* Get op/type. */
@@ -590,7 +591,6 @@ typedef union IRIns {
   check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
 #define ir_kptr(ir) \
   check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void))
-
 LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W);
 
 /* A store or any other op with a non-weak guard has a side-effect. */

+ 1 - 0
src/lj_opt_fold.c

@@ -1891,6 +1891,7 @@ LJFOLDX(lj_opt_dse_xstore)
 LJFOLD(NEWREF any any)  /* Treated like a store. */
 LJFOLD(CALLS any any)
 LJFOLD(CALLL any any)  /* Safeguard fallback. */
+LJFOLD(CALLXS any any)
 LJFOLD(RETF any any)  /* Modifies BASE. */
 LJFOLD(TNEW any any)
 LJFOLD(TDUP any)

+ 5 - 2
src/lj_opt_mem.c

@@ -675,7 +675,8 @@ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
   /* Search for conflicting stores. */
   ref = J->chain[IR_XSTORE];
 retry:
-  while (ref > xref) {
+  if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS];
+  while (ref > lim) {
     IRIns *store = IR(ref);
     switch (aa_xref(J, xr, fins, store)) {
     case ALIAS_NO:   break;  /* Continue searching. */
@@ -732,10 +733,12 @@ TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J)
 {
   IRRef xref = fins->op1;
   IRIns *xr = IR(xref);
+  IRRef lim = xref;  /* Search limit. */
   IRRef val = fins->op2;  /* Stored value reference. */
   IRRef1 *refp = &J->chain[IR_XSTORE];
   IRRef ref = *refp;
-  while (ref > xref) {  /* Search for redundant or conflicting stores. */
+  if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS];
+  while (ref > lim) {  /* Search for redundant or conflicting stores. */
     IRIns *store = IR(ref);
     switch (aa_xref(J, xr, fins, store)) {
     case ALIAS_NO:

+ 1 - 0
src/lj_traceerr.h

@@ -37,6 +37,7 @@ TREDEF(NYITMIX,	"NYI: mixed sparse/dense table")
 /* Recording C data operations. */
 TREDEF(NOCACHE,	"symbol not in cache")
 TREDEF(NYICONV,	"NYI: unsupported C type conversion")
+TREDEF(NYICALL,	"NYI: unsupported C function type")
 
 /* Optimizations. */
 TREDEF(GFAIL,	"guard would always fail")