lj_snap.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. /*
  2. ** Snapshot handling.
  3. ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
  4. */
  5. #define lj_snap_c
  6. #define LUA_CORE
  7. #include "lj_obj.h"
  8. #if LJ_HASJIT
  9. #include "lj_gc.h"
  10. #include "lj_tab.h"
  11. #include "lj_state.h"
  12. #include "lj_frame.h"
  13. #include "lj_bc.h"
  14. #include "lj_ir.h"
  15. #include "lj_jit.h"
  16. #include "lj_iropt.h"
  17. #include "lj_trace.h"
  18. #include "lj_snap.h"
  19. #include "lj_target.h"
  20. #if LJ_HASFFI
  21. #include "lj_ctype.h"
  22. #include "lj_cdata.h"
  23. #endif
  24. /* Some local macros to save typing. Undef'd at the end. */
  25. #define IR(ref) (&J->cur.ir[(ref)])
  26. /* Pass IR on to next optimization in chain (FOLD). */
  27. #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
  28. /* Emit raw IR without passing through optimizations. */
  29. #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
  30. /* -- Snapshot buffer allocation ------------------------------------------ */
  31. /* Grow snapshot buffer. */
  32. void lj_snap_grow_buf_(jit_State *J, MSize need)
  33. {
  34. MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
  35. if (need > maxsnap)
  36. lj_trace_err(J, LJ_TRERR_SNAPOV);
  37. lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
  38. J->cur.snap = J->snapbuf;
  39. }
  40. /* Grow snapshot map buffer. */
  41. void lj_snap_grow_map_(jit_State *J, MSize need)
  42. {
  43. if (need < 2*J->sizesnapmap)
  44. need = 2*J->sizesnapmap;
  45. else if (need < 64)
  46. need = 64;
  47. J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
  48. J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
  49. J->cur.snapmap = J->snapmapbuf;
  50. J->sizesnapmap = need;
  51. }
  52. /* -- Snapshot generation ------------------------------------------------- */
  53. /* Add all modified slots to the snapshot. */
  54. static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
  55. {
  56. IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */
  57. BCReg s;
  58. MSize n = 0;
  59. for (s = 0; s < nslots; s++) {
  60. TRef tr = J->slot[s];
  61. IRRef ref = tref_ref(tr);
  62. if (ref) {
  63. SnapEntry sn = SNAP_TR(s, tr);
  64. IRIns *ir = IR(ref);
  65. if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
  66. ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
  67. /* No need to snapshot unmodified non-inherited slots. */
  68. if (!(ir->op2 & IRSLOAD_INHERIT))
  69. continue;
  70. /* No need to restore readonly slots and unmodified non-parent slots. */
  71. if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
  72. (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
  73. sn |= SNAP_NORESTORE;
  74. }
  75. if (LJ_SOFTFP && irt_isnum(ir->t))
  76. sn |= SNAP_SOFTFPNUM;
  77. map[n++] = sn;
  78. }
  79. }
  80. return n;
  81. }
  82. /* Add frame links at the end of the snapshot. */
  83. static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
  84. {
  85. cTValue *frame = J->L->base - 1;
  86. cTValue *lim = J->L->base - J->baseslot;
  87. cTValue *ftop = frame + funcproto(frame_func(frame))->framesize;
  88. MSize f = 0;
  89. map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
  90. while (frame > lim) { /* Backwards traversal of all frames above base. */
  91. if (frame_islua(frame)) {
  92. map[f++] = SNAP_MKPC(frame_pc(frame));
  93. frame = frame_prevl(frame);
  94. if (frame + funcproto(frame_func(frame))->framesize > ftop)
  95. ftop = frame + funcproto(frame_func(frame))->framesize;
  96. } else if (frame_iscont(frame)) {
  97. map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
  98. map[f++] = SNAP_MKPC(frame_contpc(frame));
  99. frame = frame_prevd(frame);
  100. } else {
  101. lua_assert(!frame_isc(frame));
  102. map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
  103. frame = frame_prevd(frame);
  104. }
  105. }
  106. lua_assert(f == (MSize)(1 + J->framedepth));
  107. return (BCReg)(ftop - lim);
  108. }
  109. /* Take a snapshot of the current stack. */
  110. static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
  111. {
  112. BCReg nslots = J->baseslot + J->maxslot;
  113. MSize nent;
  114. SnapEntry *p;
  115. /* Conservative estimate. */
  116. lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
  117. p = &J->cur.snapmap[nsnapmap];
  118. nent = snapshot_slots(J, p, nslots);
  119. snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
  120. snap->mapofs = (uint16_t)nsnapmap;
  121. snap->ref = (IRRef1)J->cur.nins;
  122. snap->nent = (uint8_t)nent;
  123. snap->nslots = (uint8_t)nslots;
  124. snap->count = 0;
  125. J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
  126. }
  127. /* Add or merge a snapshot. */
  128. void lj_snap_add(jit_State *J)
  129. {
  130. MSize nsnap = J->cur.nsnap;
  131. MSize nsnapmap = J->cur.nsnapmap;
  132. /* Merge if no ins. inbetween or if requested and no guard inbetween. */
  133. if (J->mergesnap ? !irt_isguard(J->guardemit) :
  134. (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
  135. if (nsnap == 1) { /* But preserve snap #0 PC. */
  136. emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
  137. goto nomerge;
  138. }
  139. nsnapmap = J->cur.snap[--nsnap].mapofs;
  140. } else {
  141. nomerge:
  142. lj_snap_grow_buf(J, nsnap+1);
  143. J->cur.nsnap = (uint16_t)(nsnap+1);
  144. }
  145. J->mergesnap = 0;
  146. J->guardemit.irt = 0;
  147. snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
  148. }
  149. /* -- Snapshot modification ----------------------------------------------- */
  150. #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
  151. /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
  152. static BCReg snap_usedef(jit_State *J, uint8_t *udf,
  153. const BCIns *pc, BCReg maxslot)
  154. {
  155. BCReg s;
  156. GCobj *o;
  157. if (maxslot == 0) return 0;
  158. #ifdef LUAJIT_USE_VALGRIND
  159. /* Avoid errors for harmless reads beyond maxslot. */
  160. memset(udf, 1, SNAP_USEDEF_SLOTS);
  161. #else
  162. memset(udf, 1, maxslot);
  163. #endif
  164. /* Treat open upvalues as used. */
  165. o = gcref(J->L->openupval);
  166. while (o) {
  167. if (uvval(gco2uv(o)) < J->L->base) break;
  168. udf[uvval(gco2uv(o)) - J->L->base] = 0;
  169. o = gcref(o->gch.nextgc);
  170. }
  171. #define USE_SLOT(s) udf[(s)] &= ~1
  172. #define DEF_SLOT(s) udf[(s)] *= 3
  173. /* Scan through following bytecode and check for uses/defs. */
  174. lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
  175. for (;;) {
  176. BCIns ins = *pc++;
  177. BCOp op = bc_op(ins);
  178. switch (bcmode_b(op)) {
  179. case BCMvar: USE_SLOT(bc_b(ins)); break;
  180. default: break;
  181. }
  182. switch (bcmode_c(op)) {
  183. case BCMvar: USE_SLOT(bc_c(ins)); break;
  184. case BCMrbase:
  185. lua_assert(op == BC_CAT);
  186. for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
  187. for (; s < maxslot; s++) DEF_SLOT(s);
  188. break;
  189. case BCMjump:
  190. handle_jump: {
  191. BCReg minslot = bc_a(ins);
  192. if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
  193. else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
  194. else if (op == BC_UCLO) { pc += bc_j(ins); break; }
  195. for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
  196. return minslot < maxslot ? minslot : maxslot;
  197. }
  198. case BCMlit:
  199. if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
  200. goto handle_jump;
  201. } else if (bc_isret(op)) {
  202. BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
  203. for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
  204. for (; s < top; s++) USE_SLOT(s);
  205. for (; s < maxslot; s++) DEF_SLOT(s);
  206. return 0;
  207. }
  208. break;
  209. case BCMfunc: return maxslot; /* NYI: will abort, anyway. */
  210. default: break;
  211. }
  212. switch (bcmode_a(op)) {
  213. case BCMvar: USE_SLOT(bc_a(ins)); break;
  214. case BCMdst:
  215. if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
  216. break;
  217. case BCMbase:
  218. if (op >= BC_CALLM && op <= BC_VARG) {
  219. BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
  220. maxslot : (bc_a(ins) + bc_c(ins));
  221. s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
  222. for (; s < top; s++) USE_SLOT(s);
  223. for (; s < maxslot; s++) DEF_SLOT(s);
  224. if (op == BC_CALLT || op == BC_CALLMT) {
  225. for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
  226. return 0;
  227. }
  228. } else if (op == BC_KNIL) {
  229. for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
  230. } else if (op == BC_TSETM) {
  231. for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
  232. }
  233. break;
  234. default: break;
  235. }
  236. lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
  237. }
  238. #undef USE_SLOT
  239. #undef DEF_SLOT
  240. return 0; /* unreachable */
  241. }
  242. /* Purge dead slots before the next snapshot. */
  243. void lj_snap_purge(jit_State *J)
  244. {
  245. uint8_t udf[SNAP_USEDEF_SLOTS];
  246. BCReg maxslot = J->maxslot;
  247. BCReg s = snap_usedef(J, udf, J->pc, maxslot);
  248. for (; s < maxslot; s++)
  249. if (udf[s] != 0)
  250. J->base[s] = 0; /* Purge dead slots. */
  251. }
  252. /* Shrink last snapshot. */
  253. void lj_snap_shrink(jit_State *J)
  254. {
  255. SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
  256. SnapEntry *map = &J->cur.snapmap[snap->mapofs];
  257. MSize n, m, nlim, nent = snap->nent;
  258. uint8_t udf[SNAP_USEDEF_SLOTS];
  259. BCReg maxslot = J->maxslot;
  260. BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
  261. BCReg baseslot = J->baseslot;
  262. maxslot += baseslot;
  263. minslot += baseslot;
  264. snap->nslots = (uint8_t)maxslot;
  265. for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */
  266. BCReg s = snap_slot(map[n]);
  267. if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
  268. map[m++] = map[n]; /* Only copy used slots. */
  269. }
  270. snap->nent = (uint8_t)m;
  271. nlim = J->cur.nsnapmap - snap->mapofs - 1;
  272. while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
  273. J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */
  274. }
  275. /* -- Snapshot access ----------------------------------------------------- */
  276. /* Initialize a Bloom Filter with all renamed refs.
  277. ** There are very few renames (often none), so the filter has
  278. ** very few bits set. This makes it suitable for negative filtering.
  279. */
  280. static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
  281. {
  282. BloomFilter rfilt = 0;
  283. IRIns *ir;
  284. for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
  285. if (ir->op2 <= lim)
  286. bloomset(rfilt, ir->op1);
  287. return rfilt;
  288. }
  289. /* Process matching renames to find the original RegSP. */
  290. static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
  291. {
  292. IRIns *ir;
  293. for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
  294. if (ir->op1 == ref && ir->op2 <= lim)
  295. rs = ir->prev;
  296. return rs;
  297. }
  298. /* Copy RegSP from parent snapshot to the parent links of the IR. */
  299. IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
  300. {
  301. SnapShot *snap = &T->snap[snapno];
  302. SnapEntry *map = &T->snapmap[snap->mapofs];
  303. BloomFilter rfilt = snap_renamefilter(T, snapno);
  304. MSize n = 0;
  305. IRRef ref = 0;
  306. for ( ; ; ir++) {
  307. uint32_t rs;
  308. if (ir->o == IR_SLOAD) {
  309. if (!(ir->op2 & IRSLOAD_PARENT)) break;
  310. for ( ; ; n++) {
  311. lua_assert(n < snap->nent);
  312. if (snap_slot(map[n]) == ir->op1) {
  313. ref = snap_ref(map[n++]);
  314. break;
  315. }
  316. }
  317. } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
  318. ref++;
  319. } else if (ir->o == IR_PVAL) {
  320. ref = ir->op1 + REF_BIAS;
  321. } else {
  322. break;
  323. }
  324. rs = T->ir[ref].prev;
  325. if (bloomtest(rfilt, ref))
  326. rs = snap_renameref(T, snapno, ref, rs);
  327. ir->prev = (uint16_t)rs;
  328. lua_assert(regsp_used(rs));
  329. }
  330. return ir;
  331. }
  332. /* -- Snapshot replay ----------------------------------------------------- */
  333. /* Replay constant from parent trace. */
  334. static TRef snap_replay_const(jit_State *J, IRIns *ir)
  335. {
  336. /* Only have to deal with constants that can occur in stack slots. */
  337. switch ((IROp)ir->o) {
  338. case IR_KPRI: return TREF_PRI(irt_type(ir->t));
  339. case IR_KINT: return lj_ir_kint(J, ir->i);
  340. case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
  341. case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
  342. case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
  343. case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
  344. default: lua_assert(0); return TREF_NIL; break;
  345. }
  346. }
  347. /* De-duplicate parent reference. */
  348. static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
  349. {
  350. MSize j;
  351. for (j = 0; j < nmax; j++)
  352. if (snap_ref(map[j]) == ref)
  353. return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
  354. return 0;
  355. }
  356. /* Emit parent reference with de-duplication. */
  357. static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
  358. BloomFilter seen, IRRef ref)
  359. {
  360. IRIns *ir = &T->ir[ref];
  361. TRef tr;
  362. if (irref_isk(ref))
  363. tr = snap_replay_const(J, ir);
  364. else if (!regsp_used(ir->prev))
  365. tr = 0;
  366. else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
  367. tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
  368. return tr;
  369. }
  370. /* Check whether a sunk store corresponds to an allocation. Slow path. */
  371. static int snap_sunk_store2(jit_State *J, IRIns *ira, IRIns *irs)
  372. {
  373. if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
  374. irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
  375. IRIns *irk = IR(irs->op1);
  376. if (irk->o == IR_AREF || irk->o == IR_HREFK)
  377. irk = IR(irk->op1);
  378. return (IR(irk->op1) == ira);
  379. }
  380. return 0;
  381. }
  382. /* Check whether a sunk store corresponds to an allocation. Fast path. */
  383. static LJ_AINLINE int snap_sunk_store(jit_State *J, IRIns *ira, IRIns *irs)
  384. {
  385. if (irs->s != 255)
  386. return (ira + irs->s == irs); /* Fast check. */
  387. return snap_sunk_store2(J, ira, irs);
  388. }
  389. /* Replay snapshot state to setup side trace. */
  390. void lj_snap_replay(jit_State *J, GCtrace *T)
  391. {
  392. SnapShot *snap = &T->snap[J->exitno];
  393. SnapEntry *map = &T->snapmap[snap->mapofs];
  394. MSize n, nent = snap->nent;
  395. BloomFilter seen = 0;
  396. int pass23 = 0;
  397. J->framedepth = 0;
  398. /* Emit IR for slots inherited from parent snapshot. */
  399. for (n = 0; n < nent; n++) {
  400. SnapEntry sn = map[n];
  401. BCReg s = snap_slot(sn);
  402. IRRef ref = snap_ref(sn);
  403. IRIns *ir = &T->ir[ref];
  404. TRef tr;
  405. /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
  406. if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
  407. goto setslot;
  408. bloomset(seen, ref);
  409. if (irref_isk(ref)) {
  410. tr = snap_replay_const(J, ir);
  411. } else if (!regsp_used(ir->prev)) {
  412. pass23 = 1;
  413. lua_assert(s != 0);
  414. tr = s;
  415. } else {
  416. IRType t = irt_type(ir->t);
  417. uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
  418. if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
  419. if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
  420. tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
  421. }
  422. setslot:
  423. J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
  424. J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
  425. if ((sn & SNAP_FRAME))
  426. J->baseslot = s+1;
  427. }
  428. if (pass23) {
  429. IRIns *irlast = &T->ir[snap->ref];
  430. pass23 = 0;
  431. /* Emit dependent PVALs. */
  432. for (n = 0; n < nent; n++) {
  433. SnapEntry sn = map[n];
  434. IRRef refp = snap_ref(sn);
  435. IRIns *ir = &T->ir[refp];
  436. if (regsp_reg(ir->r) == RID_SUNK) {
  437. if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
  438. pass23 = 1;
  439. lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
  440. ir->o == IR_CNEW || ir->o == IR_CNEWI);
  441. if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
  442. if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
  443. if (LJ_HASFFI && ir->o == IR_CNEWI) {
  444. if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
  445. snap_pref(J, T, map, nent, seen, (ir+1)->op2);
  446. } else {
  447. IRIns *irs;
  448. for (irs = ir+1; irs < irlast; irs++)
  449. if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
  450. if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
  451. snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
  452. else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
  453. irs+1 < irlast && (irs+1)->o == IR_HIOP)
  454. snap_pref(J, T, map, nent, seen, (irs+1)->op2);
  455. }
  456. }
  457. } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
  458. lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
  459. J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
  460. }
  461. }
  462. /* Replay sunk instructions. */
  463. for (n = 0; pass23 && n < nent; n++) {
  464. SnapEntry sn = map[n];
  465. IRRef refp = snap_ref(sn);
  466. IRIns *ir = &T->ir[refp];
  467. if (regsp_reg(ir->r) == RID_SUNK) {
  468. TRef op1, op2;
  469. if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
  470. J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
  471. continue;
  472. }
  473. op1 = ir->op1;
  474. if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
  475. op2 = ir->op2;
  476. if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
  477. if (LJ_HASFFI && ir->o == IR_CNEWI) {
  478. if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
  479. lj_needsplit(J); /* Emit joining HIOP. */
  480. op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
  481. snap_pref(J, T, map, nent, seen, (ir+1)->op2));
  482. }
  483. J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
  484. } else {
  485. IRIns *irs;
  486. TRef tr = emitir(ir->ot, op1, op2);
  487. J->slot[snap_slot(sn)] = tr;
  488. for (irs = ir+1; irs < irlast; irs++)
  489. if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
  490. IRIns *irr = &T->ir[irs->op1];
  491. TRef val, key = irr->op2, tmp = tr;
  492. if (irr->o != IR_FREF) {
  493. IRIns *irk = &T->ir[key];
  494. if (irr->o == IR_HREFK)
  495. key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
  496. irk->op2);
  497. else
  498. key = snap_replay_const(J, irk);
  499. if (irr->o == IR_HREFK || irr->o == IR_AREF) {
  500. IRIns *irf = &T->ir[irr->op1];
  501. tmp = emitir(irf->ot, tmp, irf->op2);
  502. }
  503. }
  504. tmp = emitir(irr->ot, tmp, key);
  505. val = snap_pref(J, T, map, nent, seen, irs->op2);
  506. if (val == 0) {
  507. IRIns *irc = &T->ir[irs->op2];
  508. lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
  509. val = snap_pref(J, T, map, nent, seen, irc->op1);
  510. val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
  511. } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
  512. irs+1 < irlast && (irs+1)->o == IR_HIOP) {
  513. IRType t = IRT_I64;
  514. if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
  515. t = IRT_NUM;
  516. lj_needsplit(J);
  517. if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
  518. uint64_t k = (uint32_t)T->ir[irs->op2].i +
  519. ((uint64_t)T->ir[(irs+1)->op2].i << 32);
  520. val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
  521. lj_ir_k64_find(J, k));
  522. } else {
  523. val = emitir_raw(IRT(IR_HIOP, t), val,
  524. snap_pref(J, T, map, nent, seen, (irs+1)->op2));
  525. }
  526. tmp = emitir(IRT(irs->o, t), tmp, val);
  527. continue;
  528. }
  529. tmp = emitir(irs->ot, tmp, val);
  530. } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
  531. emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
  532. }
  533. }
  534. }
  535. }
  536. }
  537. J->base = J->slot + J->baseslot;
  538. J->maxslot = snap->nslots - J->baseslot;
  539. lj_snap_add(J);
  540. if (pass23) /* Need explicit GC step _after_ initial snapshot. */
  541. emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
  542. }
  543. /* -- Snapshot restore ---------------------------------------------------- */
  544. static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
  545. SnapNo snapno, BloomFilter rfilt,
  546. IRIns *ir, TValue *o);
  547. /* Restore a value from the trace exit state. */
  548. static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
  549. SnapNo snapno, BloomFilter rfilt,
  550. IRRef ref, TValue *o)
  551. {
  552. IRIns *ir = &T->ir[ref];
  553. IRType1 t = ir->t;
  554. RegSP rs = ir->prev;
  555. if (irref_isk(ref)) { /* Restore constant slot. */
  556. lj_ir_kvalue(J->L, o, ir);
  557. return;
  558. }
  559. if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
  560. rs = snap_renameref(T, snapno, ref, rs);
  561. if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
  562. int32_t *sps = &ex->spill[regsp_spill(rs)];
  563. if (irt_isinteger(t)) {
  564. setintV(o, *sps);
  565. #if !LJ_SOFTFP
  566. } else if (irt_isnum(t)) {
  567. o->u64 = *(uint64_t *)sps;
  568. #endif
  569. } else if (LJ_64 && irt_islightud(t)) {
  570. /* 64 bit lightuserdata which may escape already has the tag bits. */
  571. o->u64 = *(uint64_t *)sps;
  572. } else {
  573. lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
  574. setgcrefi(o->gcr, *sps);
  575. setitype(o, irt_toitype(t));
  576. }
  577. } else { /* Restore from register. */
  578. Reg r = regsp_reg(rs);
  579. if (ra_noreg(r)) {
  580. lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
  581. snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
  582. if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
  583. return;
  584. } else if (irt_isinteger(t)) {
  585. setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
  586. #if !LJ_SOFTFP
  587. } else if (irt_isnum(t)) {
  588. setnumV(o, ex->fpr[r-RID_MIN_FPR]);
  589. #endif
  590. } else if (LJ_64 && irt_islightud(t)) {
  591. /* 64 bit lightuserdata which may escape already has the tag bits. */
  592. o->u64 = ex->gpr[r-RID_MIN_GPR];
  593. } else {
  594. if (!irt_ispri(t))
  595. setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
  596. setitype(o, irt_toitype(t));
  597. }
  598. }
  599. }
  600. #if LJ_HASFFI
  601. /* Restore raw data from the trace exit state. */
  602. static void snap_restoredata(GCtrace *T, ExitState *ex,
  603. SnapNo snapno, BloomFilter rfilt,
  604. IRRef ref, void *dst, CTSize sz)
  605. {
  606. IRIns *ir = &T->ir[ref];
  607. RegSP rs = ir->prev;
  608. int32_t *src;
  609. uint64_t tmp;
  610. if (irref_isk(ref)) {
  611. if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
  612. src = mref(ir->ptr, int32_t);
  613. } else if (sz == 8) {
  614. tmp = (uint64_t)(uint32_t)ir->i;
  615. src = (int32_t *)&tmp;
  616. } else {
  617. src = &ir->i;
  618. }
  619. } else {
  620. if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
  621. rs = snap_renameref(T, snapno, ref, rs);
  622. if (ra_hasspill(regsp_spill(rs))) {
  623. src = &ex->spill[regsp_spill(rs)];
  624. if (sz == 8 && !irt_is64(ir->t)) {
  625. tmp = (uint64_t)(uint32_t)*src;
  626. src = (int32_t *)&tmp;
  627. }
  628. } else {
  629. Reg r = regsp_reg(rs);
  630. if (ra_noreg(r)) {
  631. /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
  632. lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
  633. snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
  634. *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
  635. return;
  636. }
  637. src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
  638. #if !LJ_SOFTFP
  639. if (r >= RID_MAX_GPR) {
  640. src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
  641. #if LJ_TARGET_PPC
  642. if (sz == 4) { /* PPC FPRs are always doubles. */
  643. *(float *)dst = (float)*(double *)src;
  644. return;
  645. }
  646. #else
  647. if (LJ_BE && sz == 4) src++;
  648. #endif
  649. }
  650. #endif
  651. }
  652. }
  653. lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
  654. if (sz == 4) *(int32_t *)dst = *src;
  655. else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
  656. else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
  657. else *(int16_t *)dst = (int16_t)*src;
  658. }
  659. #endif
  660. /* Unsink allocation from the trace exit state. Unsink sunk stores. */
  661. static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
  662. SnapNo snapno, BloomFilter rfilt,
  663. IRIns *ir, TValue *o)
  664. {
  665. lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
  666. ir->o == IR_CNEW || ir->o == IR_CNEWI);
  667. #if LJ_HASFFI
  668. if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
  669. CTState *cts = ctype_cts(J->L);
  670. CTypeID id = (CTypeID)T->ir[ir->op1].i;
  671. CTSize sz = lj_ctype_size(cts, id);
  672. GCcdata *cd = lj_cdata_new(cts, id, sz);
  673. setcdataV(J->L, o, cd);
  674. if (ir->o == IR_CNEWI) {
  675. uint8_t *p = (uint8_t *)cdataptr(cd);
  676. lua_assert(sz == 4 || sz == 8);
  677. if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
  678. snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
  679. if (LJ_BE) p += 4;
  680. sz = 4;
  681. }
  682. snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
  683. } else {
  684. IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
  685. for (irs = ir+1; irs < irlast; irs++)
  686. if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
  687. IRIns *iro = &T->ir[T->ir[irs->op1].op2];
  688. uint8_t *p = (uint8_t *)cd;
  689. CTSize szs;
  690. lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
  691. lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
  692. if (irt_is64(irs->t)) szs = 8;
  693. else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
  694. else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
  695. else szs = 4;
  696. if (LJ_64 && iro->o == IR_KINT64)
  697. p += (int64_t)ir_k64(iro)->u64;
  698. else
  699. p += iro->i;
  700. lua_assert(p >= (uint8_t *)cdataptr(cd) &&
  701. p + szs <= (uint8_t *)cdataptr(cd) + sz);
  702. if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
  703. lua_assert(szs == 4);
  704. snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
  705. if (LJ_BE) p += 4;
  706. }
  707. snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
  708. }
  709. }
  710. } else
  711. #endif
  712. {
  713. IRIns *irs, *irlast;
  714. GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
  715. lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
  716. settabV(J->L, o, t);
  717. irlast = &T->ir[T->snap[snapno].ref];
  718. for (irs = ir+1; irs < irlast; irs++)
  719. if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
  720. IRIns *irk = &T->ir[irs->op1];
  721. TValue tmp, *val;
  722. lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
  723. irs->o == IR_FSTORE);
  724. if (irk->o == IR_FREF) {
  725. lua_assert(irk->op2 == IRFL_TAB_META);
  726. snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
  727. /* NOBARRIER: The table is new (marked white). */
  728. setgcref(t->metatable, obj2gco(tabV(&tmp)));
  729. } else {
  730. irk = &T->ir[irk->op2];
  731. if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
  732. lj_ir_kvalue(J->L, &tmp, irk);
  733. val = lj_tab_set(J->L, t, &tmp);
  734. /* NOBARRIER: The table is new (marked white). */
  735. snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
  736. if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
  737. snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
  738. val->u32.hi = tmp.u32.lo;
  739. }
  740. }
  741. }
  742. }
  743. }
  744. /* Restore interpreter state from exit state with the help of a snapshot. */
  745. const BCIns *lj_snap_restore(jit_State *J, void *exptr)
  746. {
  747. ExitState *ex = (ExitState *)exptr;
  748. SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
  749. GCtrace *T = traceref(J, J->parent);
  750. SnapShot *snap = &T->snap[snapno];
  751. MSize n, nent = snap->nent;
  752. SnapEntry *map = &T->snapmap[snap->mapofs];
  753. SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
  754. int32_t ftsz0;
  755. TValue *frame;
  756. BloomFilter rfilt = snap_renamefilter(T, snapno);
  757. const BCIns *pc = snap_pc(map[nent]);
  758. lua_State *L = J->L;
  759. /* Set interpreter PC to the next PC to get correct error messages. */
  760. setcframe_pc(cframe_raw(L->cframe), pc+1);
  761. /* Make sure the stack is big enough for the slots from the snapshot. */
  762. if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
  763. L->top = curr_topL(L);
  764. lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
  765. }
  766. /* Fill stack slots with data from the registers and spill slots. */
  767. frame = L->base-1;
  768. ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
  769. for (n = 0; n < nent; n++) {
  770. SnapEntry sn = map[n];
  771. if (!(sn & SNAP_NORESTORE)) {
  772. TValue *o = &frame[snap_slot(sn)];
  773. IRRef ref = snap_ref(sn);
  774. IRIns *ir = &T->ir[ref];
  775. if (ir->r == RID_SUNK) {
  776. MSize j;
  777. for (j = 0; j < n; j++)
  778. if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
  779. copyTV(L, o, &frame[snap_slot(map[j])]);
  780. goto dupslot;
  781. }
  782. snap_unsink(J, T, ex, snapno, rfilt, ir, o);
  783. dupslot:
  784. continue;
  785. }
  786. snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
  787. if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
  788. TValue tmp;
  789. snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
  790. o->u32.hi = tmp.u32.lo;
  791. } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
  792. /* Overwrite tag with frame link. */
  793. o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0;
  794. L->base = o+1;
  795. }
  796. }
  797. }
  798. lua_assert(map + nent == flinks);
  799. /* Compute current stack top. */
  800. switch (bc_op(*pc)) {
  801. default:
  802. if (bc_op(*pc) < BC_FUNCF) {
  803. L->top = curr_topL(L);
  804. break;
  805. }
  806. /* fallthrough */
  807. case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
  808. L->top = frame + snap->nslots;
  809. break;
  810. }
  811. return pc;
  812. }
  813. #undef IR
  814. #undef emitir_raw
  815. #undef emitir
  816. #endif