lj_snap.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029
  1. /*
  2. ** Snapshot handling.
  3. ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
  4. */
  5. #define lj_snap_c
  6. #define LUA_CORE
  7. #include "lj_obj.h"
  8. #if LJ_HASJIT
  9. #include "lj_gc.h"
  10. #include "lj_tab.h"
  11. #include "lj_state.h"
  12. #include "lj_frame.h"
  13. #include "lj_bc.h"
  14. #include "lj_ir.h"
  15. #include "lj_jit.h"
  16. #include "lj_iropt.h"
  17. #include "lj_trace.h"
  18. #include "lj_snap.h"
  19. #include "lj_target.h"
  20. #if LJ_HASFFI
  21. #include "lj_ctype.h"
  22. #include "lj_cdata.h"
  23. #endif
  24. /* Pass IR on to next optimization in chain (FOLD). */
  25. #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
  26. /* Emit raw IR without passing through optimizations. */
  27. #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
  28. /* -- Snapshot buffer allocation ------------------------------------------ */
  29. /* Grow snapshot buffer. */
  30. void lj_snap_grow_buf_(jit_State *J, MSize need)
  31. {
  32. MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
  33. if (need > maxsnap)
  34. lj_trace_err(J, LJ_TRERR_SNAPOV);
  35. lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
  36. J->cur.snap = J->snapbuf;
  37. }
  38. /* Grow snapshot map buffer. */
  39. void lj_snap_grow_map_(jit_State *J, MSize need)
  40. {
  41. if (need < 2*J->sizesnapmap)
  42. need = 2*J->sizesnapmap;
  43. else if (need < 64)
  44. need = 64;
  45. J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
  46. J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
  47. J->cur.snapmap = J->snapmapbuf;
  48. J->sizesnapmap = need;
  49. }
  50. /* -- Snapshot generation ------------------------------------------------- */
  51. /* Add all modified slots to the snapshot. */
  52. static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
  53. {
  54. IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */
  55. BCReg s;
  56. MSize n = 0;
  57. for (s = 0; s < nslots; s++) {
  58. TRef tr = J->slot[s];
  59. IRRef ref = tref_ref(tr);
  60. #if LJ_FR2
  61. if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */
  62. if ((tr & TREF_FRAME))
  63. map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL);
  64. continue;
  65. }
  66. if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
  67. cTValue *base = J->L->base - J->baseslot;
  68. tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
  69. ref = tref_ref(tr);
  70. }
  71. #endif
  72. if (ref) {
  73. SnapEntry sn = SNAP_TR(s, tr);
  74. IRIns *ir = &J->cur.ir[ref];
  75. if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
  76. ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
  77. /*
  78. ** No need to snapshot unmodified non-inherited slots.
  79. ** But always snapshot the function below a frame in LJ_FR2 mode.
  80. */
  81. if (!(ir->op2 & IRSLOAD_INHERIT) &&
  82. (!LJ_FR2 || s == 0 || s+1 == nslots ||
  83. !(J->slot[s+1] & (TREF_CONT|TREF_FRAME))))
  84. continue;
  85. /* No need to restore readonly slots and unmodified non-parent slots. */
  86. if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
  87. (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
  88. sn |= SNAP_NORESTORE;
  89. }
  90. if (LJ_SOFTFP32 && irt_isnum(ir->t))
  91. sn |= SNAP_SOFTFPNUM;
  92. map[n++] = sn;
  93. }
  94. }
  95. return n;
  96. }
  97. /* Add frame links at the end of the snapshot. */
  98. static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
  99. {
  100. cTValue *frame = J->L->base - 1;
  101. cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
  102. GCfunc *fn = frame_func(frame);
  103. cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
  104. #if LJ_FR2
  105. uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
  106. lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot");
  107. memcpy(map, &pcbase, sizeof(uint64_t));
  108. #else
  109. MSize f = 0;
  110. map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
  111. #endif
  112. lj_assertJ(!J->pt ||
  113. (J->pc >= proto_bc(J->pt) &&
  114. J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC");
  115. while (frame > lim) { /* Backwards traversal of all frames above base. */
  116. if (frame_islua(frame)) {
  117. #if !LJ_FR2
  118. map[f++] = SNAP_MKPC(frame_pc(frame));
  119. #endif
  120. frame = frame_prevl(frame);
  121. } else if (frame_iscont(frame)) {
  122. #if !LJ_FR2
  123. map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
  124. map[f++] = SNAP_MKPC(frame_contpc(frame));
  125. #endif
  126. frame = frame_prevd(frame);
  127. } else {
  128. lj_assertJ(!frame_isc(frame), "broken frame chain");
  129. #if !LJ_FR2
  130. map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
  131. #endif
  132. frame = frame_prevd(frame);
  133. continue;
  134. }
  135. if (frame + funcproto(frame_func(frame))->framesize > ftop)
  136. ftop = frame + funcproto(frame_func(frame))->framesize;
  137. }
  138. *topslot = (uint8_t)(ftop - lim);
  139. #if LJ_FR2
  140. lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def");
  141. return 2;
  142. #else
  143. lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size");
  144. return f;
  145. #endif
  146. }
  147. /* Take a snapshot of the current stack. */
  148. static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
  149. {
  150. BCReg nslots = J->baseslot + J->maxslot;
  151. MSize nent;
  152. SnapEntry *p;
  153. /* Conservative estimate. */
  154. lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
  155. p = &J->cur.snapmap[nsnapmap];
  156. nent = snapshot_slots(J, p, nslots);
  157. snap->nent = (uint8_t)nent;
  158. nent += snapshot_framelinks(J, p + nent, &snap->topslot);
  159. snap->mapofs = (uint32_t)nsnapmap;
  160. snap->ref = (IRRef1)J->cur.nins;
  161. snap->mcofs = 0;
  162. snap->nslots = (uint8_t)nslots;
  163. snap->count = 0;
  164. J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
  165. }
  166. /* Add or merge a snapshot. */
  167. void lj_snap_add(jit_State *J)
  168. {
  169. MSize nsnap = J->cur.nsnap;
  170. MSize nsnapmap = J->cur.nsnapmap;
  171. /* Merge if no ins. inbetween or if requested and no guard inbetween. */
  172. if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
  173. (J->mergesnap && !irt_isguard(J->guardemit))) {
  174. if (nsnap == 1) { /* But preserve snap #0 PC. */
  175. emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
  176. goto nomerge;
  177. }
  178. nsnapmap = J->cur.snap[--nsnap].mapofs;
  179. } else {
  180. nomerge:
  181. lj_snap_grow_buf(J, nsnap+1);
  182. J->cur.nsnap = (uint16_t)(nsnap+1);
  183. }
  184. J->mergesnap = 0;
  185. J->guardemit.irt = 0;
  186. snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
  187. }
  188. /* -- Snapshot modification ----------------------------------------------- */
  189. #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
  190. /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
  191. static BCReg snap_usedef(jit_State *J, uint8_t *udf,
  192. const BCIns *pc, BCReg maxslot)
  193. {
  194. BCReg s;
  195. GCobj *o;
  196. if (maxslot == 0) return 0;
  197. #ifdef LUAJIT_USE_VALGRIND
  198. /* Avoid errors for harmless reads beyond maxslot. */
  199. memset(udf, 1, SNAP_USEDEF_SLOTS);
  200. #else
  201. memset(udf, 1, maxslot);
  202. #endif
  203. /* Treat open upvalues as used. */
  204. o = gcref(J->L->openupval);
  205. while (o) {
  206. if (uvval(gco2uv(o)) < J->L->base) break;
  207. udf[uvval(gco2uv(o)) - J->L->base] = 0;
  208. o = gcref(o->gch.nextgc);
  209. }
  210. #define USE_SLOT(s) udf[(s)] &= ~1
  211. #define DEF_SLOT(s) udf[(s)] *= 3
  212. /* Scan through following bytecode and check for uses/defs. */
  213. lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
  214. "snapshot PC out of range");
  215. for (;;) {
  216. BCIns ins = *pc++;
  217. BCOp op = bc_op(ins);
  218. switch (bcmode_b(op)) {
  219. case BCMvar: USE_SLOT(bc_b(ins)); break;
  220. default: break;
  221. }
  222. switch (bcmode_c(op)) {
  223. case BCMvar: USE_SLOT(bc_c(ins)); break;
  224. case BCMrbase:
  225. lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op);
  226. for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
  227. for (; s < maxslot; s++) DEF_SLOT(s);
  228. break;
  229. case BCMjump:
  230. handle_jump: {
  231. BCReg minslot = bc_a(ins);
  232. if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
  233. else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
  234. else if (op == BC_UCLO) {
  235. ptrdiff_t delta = bc_j(ins);
  236. if (delta < 0) return maxslot; /* Prevent loop. */
  237. pc += delta;
  238. break;
  239. }
  240. for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
  241. return minslot < maxslot ? minslot : maxslot;
  242. }
  243. case BCMlit:
  244. if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
  245. goto handle_jump;
  246. } else if (bc_isret(op)) {
  247. BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
  248. for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
  249. for (; s < top; s++) USE_SLOT(s);
  250. for (; s < maxslot; s++) DEF_SLOT(s);
  251. return 0;
  252. }
  253. break;
  254. case BCMfunc: return maxslot; /* NYI: will abort, anyway. */
  255. default: break;
  256. }
  257. switch (bcmode_a(op)) {
  258. case BCMvar: USE_SLOT(bc_a(ins)); break;
  259. case BCMdst:
  260. if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
  261. break;
  262. case BCMbase:
  263. if (op >= BC_CALLM && op <= BC_ITERN) {
  264. BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
  265. maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
  266. if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
  267. s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
  268. for (; s < top; s++) USE_SLOT(s);
  269. for (; s < maxslot; s++) DEF_SLOT(s);
  270. if (op == BC_CALLT || op == BC_CALLMT) {
  271. for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
  272. return 0;
  273. }
  274. } else if (op == BC_VARG) {
  275. return maxslot; /* NYI: punt. */
  276. } else if (op == BC_KNIL) {
  277. for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
  278. } else if (op == BC_TSETM) {
  279. for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
  280. }
  281. break;
  282. default: break;
  283. }
  284. lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
  285. "use/def analysis PC out of range");
  286. }
  287. #undef USE_SLOT
  288. #undef DEF_SLOT
  289. return 0; /* unreachable */
  290. }
  291. /* Mark slots used by upvalues of child prototypes as used. */
  292. static void snap_useuv(GCproto *pt, uint8_t *udf)
  293. {
  294. /* This is a coarse check, because it's difficult to correlate the lifetime
  295. ** of slots and closures. But the number of false positives is quite low.
  296. ** A false positive may cause a slot not to be purged, which is just
  297. ** a missed optimization.
  298. */
  299. if ((pt->flags & PROTO_CHILD)) {
  300. ptrdiff_t i, j, n = pt->sizekgc;
  301. GCRef *kr = mref(pt->k, GCRef) - 1;
  302. for (i = 0; i < n; i++, kr--) {
  303. GCobj *o = gcref(*kr);
  304. if (o->gch.gct == ~LJ_TPROTO) {
  305. for (j = 0; j < gco2pt(o)->sizeuv; j++) {
  306. uint32_t v = proto_uv(gco2pt(o))[j];
  307. if ((v & PROTO_UV_LOCAL)) {
  308. udf[(v & 0xff)] = 0;
  309. }
  310. }
  311. }
  312. }
  313. }
  314. }
  315. /* Purge dead slots before the next snapshot. */
  316. void lj_snap_purge(jit_State *J)
  317. {
  318. uint8_t udf[SNAP_USEDEF_SLOTS];
  319. BCReg s, maxslot = J->maxslot;
  320. if (bc_op(*J->pc) == BC_FUNCV && maxslot > J->pt->numparams)
  321. maxslot = J->pt->numparams;
  322. s = snap_usedef(J, udf, J->pc, maxslot);
  323. if (s < maxslot) {
  324. snap_useuv(J->pt, udf);
  325. for (; s < maxslot; s++)
  326. if (udf[s] != 0)
  327. J->base[s] = 0; /* Purge dead slots. */
  328. }
  329. }
  330. /* Shrink last snapshot. */
  331. void lj_snap_shrink(jit_State *J)
  332. {
  333. SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
  334. SnapEntry *map = &J->cur.snapmap[snap->mapofs];
  335. MSize n, m, nlim, nent = snap->nent;
  336. uint8_t udf[SNAP_USEDEF_SLOTS];
  337. BCReg maxslot = J->maxslot;
  338. BCReg baseslot = J->baseslot;
  339. BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
  340. if (minslot < maxslot) snap_useuv(J->pt, udf);
  341. maxslot += baseslot;
  342. minslot += baseslot;
  343. snap->nslots = (uint8_t)maxslot;
  344. for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */
  345. BCReg s = snap_slot(map[n]);
  346. if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
  347. map[m++] = map[n]; /* Only copy used slots. */
  348. }
  349. snap->nent = (uint8_t)m;
  350. nlim = J->cur.nsnapmap - snap->mapofs - 1;
  351. while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
  352. J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */
  353. }
  354. /* -- Snapshot access ----------------------------------------------------- */
  355. /* Initialize a Bloom Filter with all renamed refs.
  356. ** There are very few renames (often none), so the filter has
  357. ** very few bits set. This makes it suitable for negative filtering.
  358. */
  359. static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
  360. {
  361. BloomFilter rfilt = 0;
  362. IRIns *ir;
  363. for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
  364. if (ir->op2 <= lim)
  365. bloomset(rfilt, ir->op1);
  366. return rfilt;
  367. }
  368. /* Process matching renames to find the original RegSP. */
  369. static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
  370. {
  371. IRIns *ir;
  372. for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
  373. if (ir->op1 == ref && ir->op2 <= lim)
  374. rs = ir->prev;
  375. return rs;
  376. }
  377. /* Copy RegSP from parent snapshot to the parent links of the IR. */
  378. IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir)
  379. {
  380. SnapShot *snap = &T->snap[snapno];
  381. SnapEntry *map = &T->snapmap[snap->mapofs];
  382. BloomFilter rfilt = snap_renamefilter(T, snapno);
  383. MSize n = 0;
  384. IRRef ref = 0;
  385. UNUSED(J);
  386. for ( ; ; ir++) {
  387. uint32_t rs;
  388. if (ir->o == IR_SLOAD) {
  389. if (!(ir->op2 & IRSLOAD_PARENT)) break;
  390. for ( ; ; n++) {
  391. lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1);
  392. if (snap_slot(map[n]) == ir->op1) {
  393. ref = snap_ref(map[n++]);
  394. break;
  395. }
  396. }
  397. } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
  398. ref++;
  399. } else if (ir->o == IR_PVAL) {
  400. ref = ir->op1 + REF_BIAS;
  401. } else {
  402. break;
  403. }
  404. rs = T->ir[ref].prev;
  405. if (bloomtest(rfilt, ref))
  406. rs = snap_renameref(T, snapno, ref, rs);
  407. ir->prev = (uint16_t)rs;
  408. lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS);
  409. }
  410. return ir;
  411. }
  412. /* -- Snapshot replay ----------------------------------------------------- */
  413. /* Replay constant from parent trace. */
  414. static TRef snap_replay_const(jit_State *J, IRIns *ir)
  415. {
  416. /* Only have to deal with constants that can occur in stack slots. */
  417. switch ((IROp)ir->o) {
  418. case IR_KPRI: return TREF_PRI(irt_type(ir->t));
  419. case IR_KINT: return lj_ir_kint(J, ir->i);
  420. case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
  421. case IR_KNUM: case IR_KINT64:
  422. return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
  423. case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
  424. case IR_KNULL: return lj_ir_knull(J, irt_type(ir->t));
  425. default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL;
  426. }
  427. }
  428. /* De-duplicate parent reference. */
  429. static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
  430. {
  431. MSize j;
  432. for (j = 0; j < nmax; j++)
  433. if (snap_ref(map[j]) == ref)
  434. return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME);
  435. return 0;
  436. }
  437. /* Emit parent reference with de-duplication. */
  438. static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
  439. BloomFilter seen, IRRef ref)
  440. {
  441. IRIns *ir = &T->ir[ref];
  442. TRef tr;
  443. if (irref_isk(ref))
  444. tr = snap_replay_const(J, ir);
  445. else if (!regsp_used(ir->prev))
  446. tr = 0;
  447. else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
  448. tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
  449. return tr;
  450. }
  451. /* Check whether a sunk store corresponds to an allocation. Slow path. */
  452. static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
  453. {
  454. if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
  455. irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
  456. IRIns *irk = &T->ir[irs->op1];
  457. if (irk->o == IR_AREF || irk->o == IR_HREFK)
  458. irk = &T->ir[irk->op1];
  459. return (&T->ir[irk->op1] == ira);
  460. }
  461. return 0;
  462. }
  463. /* Check whether a sunk store corresponds to an allocation. Fast path. */
  464. static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs)
  465. {
  466. if (irs->s != 255)
  467. return (ira + irs->s == irs); /* Fast check. */
  468. return snap_sunk_store2(T, ira, irs);
  469. }
  470. /* Replay snapshot state to setup side trace. */
  471. void lj_snap_replay(jit_State *J, GCtrace *T)
  472. {
  473. SnapShot *snap = &T->snap[J->exitno];
  474. SnapEntry *map = &T->snapmap[snap->mapofs];
  475. MSize n, nent = snap->nent;
  476. BloomFilter seen = 0;
  477. int pass23 = 0;
  478. J->framedepth = 0;
  479. /* Emit IR for slots inherited from parent snapshot. */
  480. for (n = 0; n < nent; n++) {
  481. SnapEntry sn = map[n];
  482. BCReg s = snap_slot(sn);
  483. IRRef ref = snap_ref(sn);
  484. IRIns *ir = &T->ir[ref];
  485. TRef tr;
  486. /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
  487. if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
  488. goto setslot;
  489. bloomset(seen, ref);
  490. if (irref_isk(ref)) {
  491. /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */
  492. if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)))
  493. tr = 0;
  494. else
  495. tr = snap_replay_const(J, ir);
  496. } else if (!regsp_used(ir->prev)) {
  497. pass23 = 1;
  498. lj_assertJ(s != 0, "unused slot 0 in snapshot");
  499. tr = s;
  500. } else {
  501. IRType t = irt_type(ir->t);
  502. uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
  503. if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
  504. if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
  505. if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX;
  506. tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
  507. }
  508. setslot:
  509. /* Same as TREF_* flags. */
  510. J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME));
  511. J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
  512. if ((sn & SNAP_FRAME))
  513. J->baseslot = s+1;
  514. }
  515. if (pass23) {
  516. IRIns *irlast = &T->ir[snap->ref];
  517. pass23 = 0;
  518. /* Emit dependent PVALs. */
  519. for (n = 0; n < nent; n++) {
  520. SnapEntry sn = map[n];
  521. IRRef refp = snap_ref(sn);
  522. IRIns *ir = &T->ir[refp];
  523. if (regsp_reg(ir->r) == RID_SUNK) {
  524. uint8_t m;
  525. if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
  526. pass23 = 1;
  527. lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
  528. ir->o == IR_CNEW || ir->o == IR_CNEWI,
  529. "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
  530. m = lj_ir_mode[ir->o];
  531. if (irm_op1(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op1);
  532. if (irm_op2(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op2);
  533. if (LJ_HASFFI && ir->o == IR_CNEWI) {
  534. if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
  535. snap_pref(J, T, map, nent, seen, (ir+1)->op2);
  536. } else {
  537. IRIns *irs;
  538. for (irs = ir+1; irs < irlast; irs++)
  539. if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
  540. if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
  541. snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
  542. else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
  543. irs+1 < irlast && (irs+1)->o == IR_HIOP)
  544. snap_pref(J, T, map, nent, seen, (irs+1)->op2);
  545. }
  546. }
  547. } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
  548. lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
  549. "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
  550. J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
  551. }
  552. }
  553. /* Replay sunk instructions. */
  554. for (n = 0; pass23 && n < nent; n++) {
  555. SnapEntry sn = map[n];
  556. IRRef refp = snap_ref(sn);
  557. IRIns *ir = &T->ir[refp];
  558. if (regsp_reg(ir->r) == RID_SUNK) {
  559. TRef op1, op2;
  560. uint8_t m;
  561. if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
  562. J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
  563. continue;
  564. }
  565. op1 = ir->op1;
  566. m = lj_ir_mode[ir->o];
  567. if (irm_op1(m) == IRMref) op1 = snap_pref(J, T, map, nent, seen, op1);
  568. op2 = ir->op2;
  569. if (irm_op2(m) == IRMref) op2 = snap_pref(J, T, map, nent, seen, op2);
  570. if (LJ_HASFFI && ir->o == IR_CNEWI) {
  571. if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
  572. lj_needsplit(J); /* Emit joining HIOP. */
  573. op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
  574. snap_pref(J, T, map, nent, seen, (ir+1)->op2));
  575. }
  576. J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
  577. } else {
  578. IRIns *irs;
  579. TRef tr = emitir(ir->ot, op1, op2);
  580. J->slot[snap_slot(sn)] = tr;
  581. for (irs = ir+1; irs < irlast; irs++)
  582. if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
  583. IRIns *irr = &T->ir[irs->op1];
  584. TRef val, key = irr->op2, tmp = tr;
  585. if (irr->o != IR_FREF) {
  586. IRIns *irk = &T->ir[key];
  587. if (irr->o == IR_HREFK)
  588. key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
  589. irk->op2);
  590. else
  591. key = snap_replay_const(J, irk);
  592. if (irr->o == IR_HREFK || irr->o == IR_AREF) {
  593. IRIns *irf = &T->ir[irr->op1];
  594. tmp = emitir(irf->ot, tmp, irf->op2);
  595. } else if (irr->o == IR_NEWREF) {
  596. IRRef allocref = tref_ref(tr);
  597. IRRef keyref = tref_ref(key);
  598. IRRef newref_ref = J->chain[IR_NEWREF];
  599. IRIns *newref = &J->cur.ir[newref_ref];
  600. lj_assertJ(irref_isk(keyref),
  601. "sunk store for parent IR %04d with bad key %04d",
  602. refp - REF_BIAS, keyref - REF_BIAS);
  603. if (newref_ref > allocref && newref->op2 == keyref) {
  604. lj_assertJ(newref->op1 == allocref,
  605. "sunk store for parent IR %04d with bad tab %04d",
  606. refp - REF_BIAS, allocref - REF_BIAS);
  607. tmp = newref_ref;
  608. goto skip_newref;
  609. }
  610. }
  611. }
  612. tmp = emitir(irr->ot, tmp, key);
  613. skip_newref:
  614. val = snap_pref(J, T, map, nent, seen, irs->op2);
  615. if (val == 0) {
  616. IRIns *irc = &T->ir[irs->op2];
  617. lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT,
  618. "sunk store for parent IR %04d with bad op %d",
  619. refp - REF_BIAS, irc->o);
  620. val = snap_pref(J, T, map, nent, seen, irc->op1);
  621. val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
  622. } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
  623. irs+1 < irlast && (irs+1)->o == IR_HIOP) {
  624. IRType t = IRT_I64;
  625. if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
  626. t = IRT_NUM;
  627. lj_needsplit(J);
  628. if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
  629. uint64_t k = (uint32_t)T->ir[irs->op2].i +
  630. ((uint64_t)T->ir[(irs+1)->op2].i << 32);
  631. val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
  632. } else {
  633. val = emitir_raw(IRT(IR_HIOP, t), val,
  634. snap_pref(J, T, map, nent, seen, (irs+1)->op2));
  635. }
  636. tmp = emitir(IRT(irs->o, t), tmp, val);
  637. continue;
  638. }
  639. tmp = emitir(irs->ot, tmp, val);
  640. } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
  641. emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
  642. }
  643. }
  644. }
  645. }
  646. }
  647. J->base = J->slot + J->baseslot;
  648. J->maxslot = snap->nslots - J->baseslot;
  649. lj_snap_add(J);
  650. if (pass23) /* Need explicit GC step _after_ initial snapshot. */
  651. emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
  652. }
  653. /* -- Snapshot restore ---------------------------------------------------- */
  654. static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
  655. SnapNo snapno, BloomFilter rfilt,
  656. IRIns *ir, TValue *o);
  657. /* Restore a value from the trace exit state. */
  658. static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
  659. SnapNo snapno, BloomFilter rfilt,
  660. IRRef ref, TValue *o)
  661. {
  662. IRIns *ir = &T->ir[ref];
  663. IRType1 t = ir->t;
  664. RegSP rs = ir->prev;
  665. if (irref_isk(ref)) { /* Restore constant slot. */
  666. if (ir->o == IR_KPTR) {
  667. o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir);
  668. } else {
  669. lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL),
  670. "restore of const from IR %04d with bad op %d",
  671. ref - REF_BIAS, ir->o);
  672. lj_ir_kvalue(J->L, o, ir);
  673. }
  674. return;
  675. }
  676. if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
  677. rs = snap_renameref(T, snapno, ref, rs);
  678. if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
  679. int32_t *sps = &ex->spill[regsp_spill(rs)];
  680. if (irt_isinteger(t)) {
  681. setintV(o, *sps);
  682. #if !LJ_SOFTFP32
  683. } else if (irt_isnum(t)) {
  684. o->u64 = *(uint64_t *)sps;
  685. #endif
  686. #if LJ_64 && !LJ_GC64
  687. } else if (irt_islightud(t)) {
  688. /* 64 bit lightuserdata which may escape already has the tag bits. */
  689. o->u64 = *(uint64_t *)sps;
  690. #endif
  691. } else {
  692. lj_assertJ(!irt_ispri(t), "PRI ref with spill slot");
  693. setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
  694. }
  695. } else { /* Restore from register. */
  696. Reg r = regsp_reg(rs);
  697. if (ra_noreg(r)) {
  698. lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
  699. "restore from IR %04d has no reg", ref - REF_BIAS);
  700. snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
  701. if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
  702. return;
  703. } else if (irt_isinteger(t)) {
  704. setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
  705. #if !LJ_SOFTFP
  706. } else if (irt_isnum(t)) {
  707. setnumV(o, ex->fpr[r-RID_MIN_FPR]);
  708. #elif LJ_64 /* && LJ_SOFTFP */
  709. } else if (irt_isnum(t)) {
  710. o->u64 = ex->gpr[r-RID_MIN_GPR];
  711. #endif
  712. #if LJ_64 && !LJ_GC64
  713. } else if (irt_is64(t)) {
  714. /* 64 bit values that already have the tag bits. */
  715. o->u64 = ex->gpr[r-RID_MIN_GPR];
  716. #endif
  717. } else if (irt_ispri(t)) {
  718. setpriV(o, irt_toitype(t));
  719. } else {
  720. setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
  721. }
  722. }
  723. }
  724. #if LJ_HASFFI
  725. /* Restore raw data from the trace exit state. */
  726. static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex,
  727. SnapNo snapno, BloomFilter rfilt,
  728. IRRef ref, void *dst, CTSize sz)
  729. {
  730. IRIns *ir = &T->ir[ref];
  731. RegSP rs = ir->prev;
  732. int32_t *src;
  733. uint64_t tmp;
  734. UNUSED(J);
  735. if (irref_isk(ref)) {
  736. if (ir_isk64(ir)) {
  737. src = (int32_t *)&ir[1];
  738. } else if (sz == 8) {
  739. tmp = (uint64_t)(uint32_t)ir->i;
  740. src = (int32_t *)&tmp;
  741. } else {
  742. src = &ir->i;
  743. }
  744. } else {
  745. if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
  746. rs = snap_renameref(T, snapno, ref, rs);
  747. if (ra_hasspill(regsp_spill(rs))) {
  748. src = &ex->spill[regsp_spill(rs)];
  749. if (sz == 8 && !irt_is64(ir->t)) {
  750. tmp = (uint64_t)(uint32_t)*src;
  751. src = (int32_t *)&tmp;
  752. }
  753. } else {
  754. Reg r = regsp_reg(rs);
  755. if (ra_noreg(r)) {
  756. /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
  757. lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
  758. "restore from IR %04d has no reg", ref - REF_BIAS);
  759. snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4);
  760. *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
  761. return;
  762. }
  763. src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
  764. #if !LJ_SOFTFP
  765. if (r >= RID_MAX_GPR) {
  766. src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
  767. #if LJ_TARGET_PPC
  768. if (sz == 4) { /* PPC FPRs are always doubles. */
  769. *(float *)dst = (float)*(double *)src;
  770. return;
  771. }
  772. #else
  773. if (LJ_BE && sz == 4) src++;
  774. #endif
  775. } else
  776. #endif
  777. if (LJ_64 && LJ_BE && sz == 4) src++;
  778. }
  779. }
  780. lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8,
  781. "restore from IR %04d with bad size %d", ref - REF_BIAS, sz);
  782. if (sz == 4) *(int32_t *)dst = *src;
  783. else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
  784. else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
  785. else *(int16_t *)dst = (int16_t)*src;
  786. }
  787. #endif
  788. /* Unsink allocation from the trace exit state. Unsink sunk stores. */
  789. static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
  790. SnapNo snapno, BloomFilter rfilt,
  791. IRIns *ir, TValue *o)
  792. {
  793. lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
  794. ir->o == IR_CNEW || ir->o == IR_CNEWI,
  795. "sunk allocation with bad op %d", ir->o);
  796. #if LJ_HASFFI
  797. if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
  798. CTState *cts = ctype_cts(J->L);
  799. CTypeID id = (CTypeID)T->ir[ir->op1].i;
  800. CTSize sz;
  801. CTInfo info = lj_ctype_info(cts, id, &sz);
  802. GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
  803. setcdataV(J->L, o, cd);
  804. if (ir->o == IR_CNEWI) {
  805. uint8_t *p = (uint8_t *)cdataptr(cd);
  806. lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz);
  807. if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
  808. snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2,
  809. LJ_LE ? p+4 : p, 4);
  810. if (LJ_BE) p += 4;
  811. sz = 4;
  812. }
  813. snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz);
  814. } else {
  815. IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
  816. for (irs = ir+1; irs < irlast; irs++)
  817. if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
  818. IRIns *iro = &T->ir[T->ir[irs->op1].op2];
  819. uint8_t *p = (uint8_t *)cd;
  820. CTSize szs;
  821. lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o);
  822. lj_assertJ(T->ir[irs->op1].o == IR_ADD,
  823. "sunk store with bad add op %d", T->ir[irs->op1].o);
  824. lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64,
  825. "sunk store with bad const offset op %d", iro->o);
  826. if (irt_is64(irs->t)) szs = 8;
  827. else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
  828. else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
  829. else szs = 4;
  830. if (LJ_64 && iro->o == IR_KINT64)
  831. p += (int64_t)ir_k64(iro)->u64;
  832. else
  833. p += iro->i;
  834. lj_assertJ(p >= (uint8_t *)cdataptr(cd) &&
  835. p + szs <= (uint8_t *)cdataptr(cd) + sz,
  836. "sunk store with offset out of range");
  837. if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
  838. lj_assertJ(szs == 4, "sunk store with bad size %d", szs);
  839. snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2,
  840. LJ_LE ? p+4 : p, 4);
  841. if (LJ_BE) p += 4;
  842. }
  843. snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs);
  844. }
  845. }
  846. } else
  847. #endif
  848. {
  849. IRIns *irs, *irlast;
  850. GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
  851. lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
  852. settabV(J->L, o, t);
  853. irlast = &T->ir[T->snap[snapno].ref];
  854. for (irs = ir+1; irs < irlast; irs++)
  855. if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
  856. IRIns *irk = &T->ir[irs->op1];
  857. TValue tmp, *val;
  858. lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
  859. irs->o == IR_FSTORE,
  860. "sunk store with bad op %d", irs->o);
  861. if (irk->o == IR_FREF) {
  862. switch (irk->op2) {
  863. case IRFL_TAB_META:
  864. if (T->ir[irs->op2].o == IR_KNULL) {
  865. setgcrefnull(t->metatable);
  866. } else {
  867. snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
  868. /* NOBARRIER: The table is new (marked white). */
  869. setgcref(t->metatable, obj2gco(tabV(&tmp)));
  870. }
  871. break;
  872. case IRFL_TAB_NOMM:
  873. /* Negative metamethod cache invalidated by lj_tab_set() below. */
  874. break;
  875. default:
  876. lj_assertJ(0, "sunk store with bad field %d", irk->op2);
  877. break;
  878. }
  879. } else {
  880. irk = &T->ir[irk->op2];
  881. if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
  882. lj_ir_kvalue(J->L, &tmp, irk);
  883. val = lj_tab_set(J->L, t, &tmp);
  884. /* NOBARRIER: The table is new (marked white). */
  885. snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
  886. if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
  887. snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
  888. val->u32.hi = tmp.u32.lo;
  889. }
  890. }
  891. }
  892. }
  893. }
  894. /* Restore interpreter state from exit state with the help of a snapshot. */
  895. const BCIns *lj_snap_restore(jit_State *J, void *exptr)
  896. {
  897. ExitState *ex = (ExitState *)exptr;
  898. SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
  899. GCtrace *T = traceref(J, J->parent);
  900. SnapShot *snap = &T->snap[snapno];
  901. MSize n, nent = snap->nent;
  902. SnapEntry *map = &T->snapmap[snap->mapofs];
  903. #if !LJ_FR2 || defined(LUA_USE_ASSERT)
  904. SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
  905. #endif
  906. #if !LJ_FR2
  907. ptrdiff_t ftsz0;
  908. #endif
  909. TValue *frame;
  910. BloomFilter rfilt = snap_renamefilter(T, snapno);
  911. const BCIns *pc = snap_pc(&map[nent]);
  912. lua_State *L = J->L;
  913. /* Set interpreter PC to the next PC to get correct error messages. */
  914. setcframe_pc(cframe_raw(L->cframe), pc+1);
  915. /* Make sure the stack is big enough for the slots from the snapshot. */
  916. if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
  917. L->top = curr_topL(L);
  918. lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
  919. }
  920. /* Fill stack slots with data from the registers and spill slots. */
  921. frame = L->base-1-LJ_FR2;
  922. #if !LJ_FR2
  923. ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
  924. #endif
  925. for (n = 0; n < nent; n++) {
  926. SnapEntry sn = map[n];
  927. if (!(sn & SNAP_NORESTORE)) {
  928. TValue *o = &frame[snap_slot(sn)];
  929. IRRef ref = snap_ref(sn);
  930. IRIns *ir = &T->ir[ref];
  931. if (ir->r == RID_SUNK) {
  932. MSize j;
  933. for (j = 0; j < n; j++)
  934. if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
  935. copyTV(L, o, &frame[snap_slot(map[j])]);
  936. goto dupslot;
  937. }
  938. snap_unsink(J, T, ex, snapno, rfilt, ir, o);
  939. dupslot:
  940. continue;
  941. }
  942. snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
  943. if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
  944. TValue tmp;
  945. snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
  946. o->u32.hi = tmp.u32.lo;
  947. #if !LJ_FR2
  948. } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
  949. /* Overwrite tag with frame link. */
  950. setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
  951. L->base = o+1;
  952. #endif
  953. } else if ((sn & SNAP_KEYINDEX)) {
  954. /* A IRT_INT key index slot is restored as a number. Undo this. */
  955. o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o)));
  956. o->u32.hi = LJ_KEYINDEX;
  957. }
  958. }
  959. }
  960. #if LJ_FR2
  961. L->base += (map[nent+LJ_BE] & 0xff);
  962. #endif
  963. lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot");
  964. /* Compute current stack top. */
  965. switch (bc_op(*pc)) {
  966. default:
  967. if (bc_op(*pc) < BC_FUNCF) {
  968. L->top = curr_topL(L);
  969. break;
  970. }
  971. /* fallthrough */
  972. case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
  973. L->top = frame + snap->nslots;
  974. break;
  975. }
  976. return pc;
  977. }
  978. #undef emitir_raw
  979. #undef emitir
  980. #endif