lj_opt_mem.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990
  1. /*
  2. ** Memory access optimizations.
  3. ** AA: Alias Analysis using high-level semantic disambiguation.
  4. ** FWD: Load Forwarding (L2L) + Store Forwarding (S2L).
  5. ** DSE: Dead-Store Elimination.
  6. ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
  7. */
  8. #define lj_opt_mem_c
  9. #define LUA_CORE
  10. #include "lj_obj.h"
  11. #if LJ_HASJIT
  12. #include "lj_tab.h"
  13. #include "lj_ir.h"
  14. #include "lj_jit.h"
  15. #include "lj_iropt.h"
  16. #include "lj_ircall.h"
  17. #include "lj_dispatch.h"
  18. /* Some local macros to save typing. Undef'd at the end. */
  19. #define IR(ref) (&J->cur.ir[(ref)])
  20. #define fins (&J->fold.ins)
  21. #define fleft (J->fold.left)
  22. #define fright (J->fold.right)
  23. /*
  24. ** Caveat #1: return value is not always a TRef -- only use with tref_ref().
  25. ** Caveat #2: FWD relies on active CSE for xREF operands -- see lj_opt_fold().
  26. */
  27. /* Return values from alias analysis. */
  28. typedef enum {
  29. ALIAS_NO, /* The two refs CANNOT alias (exact). */
  30. ALIAS_MAY, /* The two refs MAY alias (inexact). */
  31. ALIAS_MUST /* The two refs MUST alias (exact). */
  32. } AliasRet;
  33. /* -- ALOAD/HLOAD forwarding and ASTORE/HSTORE elimination ---------------- */
  34. /* Simplified escape analysis: check for intervening stores. */
  35. static AliasRet aa_escape(jit_State *J, IRIns *ir, IRIns *stop)
  36. {
  37. IRRef ref = (IRRef)(ir - J->cur.ir); /* The ref that might be stored. */
  38. for (ir++; ir < stop; ir++)
  39. if (ir->op2 == ref &&
  40. (ir->o == IR_ASTORE || ir->o == IR_HSTORE ||
  41. ir->o == IR_USTORE || ir->o == IR_FSTORE))
  42. return ALIAS_MAY; /* Reference was stored and might alias. */
  43. return ALIAS_NO; /* Reference was not stored. */
  44. }
  45. /* Alias analysis for two different table references. */
  46. static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb)
  47. {
  48. IRIns *taba = IR(ta), *tabb = IR(tb);
  49. int newa, newb;
  50. lj_assertJ(ta != tb, "bad usage");
  51. lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad usage");
  52. /* Disambiguate new allocations. */
  53. newa = (taba->o == IR_TNEW || taba->o == IR_TDUP);
  54. newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP);
  55. if (newa && newb)
  56. return ALIAS_NO; /* Two different allocations never alias. */
  57. if (newb) { /* At least one allocation? */
  58. IRIns *tmp = taba; taba = tabb; tabb = tmp;
  59. } else if (!newa) {
  60. return ALIAS_MAY; /* Anything else: we just don't know. */
  61. }
  62. return aa_escape(J, taba, tabb);
  63. }
  64. /* Check whether there's no aliasing table.clear. */
  65. static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
  66. {
  67. IRRef ref = J->chain[IR_CALLS];
  68. while (ref > lim) {
  69. IRIns *calls = IR(ref);
  70. if (calls->op2 == IRCALL_lj_tab_clear &&
  71. (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
  72. return 0; /* Conflict. */
  73. ref = calls->prev;
  74. }
  75. return 1; /* No conflict. Can safely FOLD/CSE. */
  76. }
  77. /* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
  78. int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
  79. {
  80. IRRef ta = fins->op1;
  81. IRRef ref = J->chain[IR_NEWREF];
  82. while (ref > lim) {
  83. IRIns *newref = IR(ref);
  84. if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO)
  85. return 0; /* Conflict. */
  86. ref = newref->prev;
  87. }
  88. return fwd_aa_tab_clear(J, lim, ta);
  89. }
  90. /* Alias analysis for array and hash access using key-based disambiguation. */
  91. static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
  92. {
  93. IRRef ka = refa->op2;
  94. IRRef kb = refb->op2;
  95. IRIns *keya, *keyb;
  96. IRRef ta, tb;
  97. if (refa == refb)
  98. return ALIAS_MUST; /* Shortcut for same refs. */
  99. keya = IR(ka);
  100. if (keya->o == IR_KSLOT) { ka = keya->op1; keya = IR(ka); }
  101. keyb = IR(kb);
  102. if (keyb->o == IR_KSLOT) { kb = keyb->op1; keyb = IR(kb); }
  103. ta = (refa->o==IR_HREFK || refa->o==IR_AREF) ? IR(refa->op1)->op1 : refa->op1;
  104. tb = (refb->o==IR_HREFK || refb->o==IR_AREF) ? IR(refb->op1)->op1 : refb->op1;
  105. if (ka == kb) {
  106. /* Same key. Check for same table with different ref (NEWREF vs. HREF). */
  107. if (ta == tb)
  108. return ALIAS_MUST; /* Same key, same table. */
  109. else
  110. return aa_table(J, ta, tb); /* Same key, possibly different table. */
  111. }
  112. if (irref_isk(ka) && irref_isk(kb))
  113. return ALIAS_NO; /* Different constant keys. */
  114. if (refa->o == IR_AREF) {
  115. /* Disambiguate array references based on index arithmetic. */
  116. int32_t ofsa = 0, ofsb = 0;
  117. IRRef basea = ka, baseb = kb;
  118. lj_assertJ(refb->o == IR_AREF, "expected AREF");
  119. /* Gather base and offset from t[base] or t[base+-ofs]. */
  120. if (keya->o == IR_ADD && irref_isk(keya->op2)) {
  121. basea = keya->op1;
  122. ofsa = IR(keya->op2)->i;
  123. if (basea == kb && ofsa != 0)
  124. return ALIAS_NO; /* t[base+-ofs] vs. t[base]. */
  125. }
  126. if (keyb->o == IR_ADD && irref_isk(keyb->op2)) {
  127. baseb = keyb->op1;
  128. ofsb = IR(keyb->op2)->i;
  129. if (ka == baseb && ofsb != 0)
  130. return ALIAS_NO; /* t[base] vs. t[base+-ofs]. */
  131. }
  132. if (basea == baseb && ofsa != ofsb)
  133. return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */
  134. } else {
  135. /* Disambiguate hash references based on the type of their keys. */
  136. lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) &&
  137. (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF),
  138. "bad xREF IR op %d or %d", refa->o, refb->o);
  139. if (!irt_sametype(keya->t, keyb->t))
  140. return ALIAS_NO; /* Different key types. */
  141. }
  142. if (ta == tb)
  143. return ALIAS_MAY; /* Same table, cannot disambiguate keys. */
  144. else
  145. return aa_table(J, ta, tb); /* Try to disambiguate tables. */
  146. }
  147. /* Array and hash load forwarding. */
  148. static TRef fwd_ahload(jit_State *J, IRRef xref)
  149. {
  150. IRIns *xr = IR(xref);
  151. IRRef lim = xref; /* Search limit. */
  152. IRRef ref;
  153. /* Search for conflicting stores. */
  154. ref = J->chain[fins->o+IRDELTA_L2S];
  155. while (ref > xref) {
  156. IRIns *store = IR(ref);
  157. switch (aa_ahref(J, xr, IR(store->op1))) {
  158. case ALIAS_NO: break; /* Continue searching. */
  159. case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */
  160. case ALIAS_MUST: return store->op2; /* Store forwarding. */
  161. }
  162. ref = store->prev;
  163. }
  164. /* No conflicting store (yet): const-fold loads from allocations. */
  165. {
  166. IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr;
  167. IRRef tab = ir->op1;
  168. ir = IR(tab);
  169. if ((ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) &&
  170. fwd_aa_tab_clear(J, tab, tab)) {
  171. /* A NEWREF with a number key may end up pointing to the array part.
  172. ** But it's referenced from HSTORE and not found in the ASTORE chain.
  173. ** Or a NEWREF may rehash the table and move unrelated number keys.
  174. ** For now simply consider this a conflict without forwarding anything.
  175. */
  176. if (xr->o == IR_AREF) {
  177. IRRef ref2 = J->chain[IR_NEWREF];
  178. while (ref2 > tab) {
  179. IRIns *newref = IR(ref2);
  180. if (irt_isnum(IR(newref->op2)->t))
  181. goto cselim;
  182. ref2 = newref->prev;
  183. }
  184. } else {
  185. IRIns *key = IR(xr->op2);
  186. if (key->o == IR_KSLOT) key = IR(key->op1);
  187. if (irt_isnum(key->t) && J->chain[IR_NEWREF] > tab)
  188. goto cselim;
  189. }
  190. /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF.
  191. ** But the above search for conflicting stores was limited by xref.
  192. ** So continue searching, limited by the TNEW/TDUP. Store forwarding
  193. ** is ok, too. A conflict does NOT limit the search for a matching load.
  194. */
  195. while (ref > tab) {
  196. IRIns *store = IR(ref);
  197. switch (aa_ahref(J, xr, IR(store->op1))) {
  198. case ALIAS_NO: break; /* Continue searching. */
  199. case ALIAS_MAY: goto cselim; /* Conflicting store. */
  200. case ALIAS_MUST: return store->op2; /* Store forwarding. */
  201. }
  202. ref = store->prev;
  203. }
  204. /* Simplified here: let loop_unroll() figure out any type instability. */
  205. if (ir->o == IR_TNEW) {
  206. return TREF_NIL;
  207. } else {
  208. TValue keyv;
  209. cTValue *tv;
  210. IRIns *key = IR(xr->op2);
  211. if (key->o == IR_KSLOT) key = IR(key->op1);
  212. lj_ir_kvalue(J->L, &keyv, key);
  213. tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
  214. if (tvispri(tv))
  215. return TREF_PRI(itype2irt(tv));
  216. else if (tvisnum(tv))
  217. return lj_ir_knum_u64(J, tv->u64);
  218. else if (tvisint(tv))
  219. return lj_ir_kint(J, intV(tv));
  220. else if (tvisgcv(tv))
  221. return lj_ir_kstr(J, strV(tv));
  222. }
  223. /* Othwerwise: don't intern as a constant. */
  224. }
  225. }
  226. cselim:
  227. /* Try to find a matching load. Below the conflicting store, if any. */
  228. ref = J->chain[fins->o];
  229. while (ref > lim) {
  230. IRIns *load = IR(ref);
  231. if (load->op1 == xref)
  232. return ref; /* Load forwarding. */
  233. ref = load->prev;
  234. }
  235. return 0; /* Conflict or no match. */
  236. }
  237. /* Reassociate ALOAD across PHIs to handle t[i-1] forwarding case. */
  238. static TRef fwd_aload_reassoc(jit_State *J)
  239. {
  240. IRIns *irx = IR(fins->op1);
  241. IRIns *key = IR(irx->op2);
  242. if (key->o == IR_ADD && irref_isk(key->op2)) {
  243. IRIns *add2 = IR(key->op1);
  244. if (add2->o == IR_ADD && irref_isk(add2->op2) &&
  245. IR(key->op2)->i == -IR(add2->op2)->i) {
  246. IRRef ref = J->chain[IR_AREF];
  247. IRRef lim = add2->op1;
  248. if (irx->op1 > lim) lim = irx->op1;
  249. while (ref > lim) {
  250. IRIns *ir = IR(ref);
  251. if (ir->op1 == irx->op1 && ir->op2 == add2->op1)
  252. return fwd_ahload(J, ref);
  253. ref = ir->prev;
  254. }
  255. }
  256. }
  257. return 0;
  258. }
  259. /* ALOAD forwarding. */
  260. TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J)
  261. {
  262. IRRef ref;
  263. if ((ref = fwd_ahload(J, fins->op1)) ||
  264. (ref = fwd_aload_reassoc(J)))
  265. return ref;
  266. return EMITFOLD;
  267. }
  268. /* HLOAD forwarding. */
  269. TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J)
  270. {
  271. IRRef ref = fwd_ahload(J, fins->op1);
  272. if (ref)
  273. return ref;
  274. return EMITFOLD;
  275. }
  276. /* HREFK forwarding. */
  277. TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J)
  278. {
  279. IRRef tab = fleft->op1;
  280. IRRef ref = J->chain[IR_NEWREF];
  281. while (ref > tab) {
  282. IRIns *newref = IR(ref);
  283. if (tab == newref->op1) {
  284. if (fright->op1 == newref->op2 && fwd_aa_tab_clear(J, ref, tab))
  285. return ref; /* Forward from NEWREF. */
  286. else
  287. goto docse;
  288. } else if (aa_table(J, tab, newref->op1) != ALIAS_NO) {
  289. goto docse;
  290. }
  291. ref = newref->prev;
  292. }
  293. /* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */
  294. if (IR(tab)->o == IR_TDUP && fwd_aa_tab_clear(J, tab, tab))
  295. fins->t.irt &= ~IRT_GUARD; /* Drop HREFK guard. */
  296. docse:
  297. return CSEFOLD;
  298. }
  299. /* Check whether HREF of TNEW/TDUP can be folded to niltv. */
  300. int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J)
  301. {
  302. IRRef lim = fins->op1; /* Search limit. */
  303. IRRef ref;
  304. /* The key for an ASTORE may end up in the hash part after a NEWREF. */
  305. if (irt_isnum(fright->t) && J->chain[IR_NEWREF] > lim) {
  306. ref = J->chain[IR_ASTORE];
  307. while (ref > lim) {
  308. if (ref < J->chain[IR_NEWREF])
  309. return 0; /* Conflict. */
  310. ref = IR(ref)->prev;
  311. }
  312. }
  313. /* Search for conflicting stores. */
  314. ref = J->chain[IR_HSTORE];
  315. while (ref > lim) {
  316. IRIns *store = IR(ref);
  317. if (aa_ahref(J, fins, IR(store->op1)) != ALIAS_NO)
  318. return 0; /* Conflict. */
  319. ref = store->prev;
  320. }
  321. return 1; /* No conflict. Can fold to niltv. */
  322. }
  323. /* ASTORE/HSTORE elimination. */
  324. TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
  325. {
  326. IRRef xref = fins->op1; /* xREF reference. */
  327. IRRef val = fins->op2; /* Stored value reference. */
  328. IRIns *xr = IR(xref);
  329. IRRef1 *refp = &J->chain[fins->o];
  330. IRRef ref = *refp;
  331. while (ref > xref) { /* Search for redundant or conflicting stores. */
  332. IRIns *store = IR(ref);
  333. switch (aa_ahref(J, xr, IR(store->op1))) {
  334. case ALIAS_NO:
  335. break; /* Continue searching. */
  336. case ALIAS_MAY: /* Store to MAYBE the same location. */
  337. if (store->op2 != val) /* Conflict if the value is different. */
  338. goto doemit;
  339. break; /* Otherwise continue searching. */
  340. case ALIAS_MUST: /* Store to the same location. */
  341. if (store->op2 == val) /* Same value: drop the new store. */
  342. return DROPFOLD;
  343. /* Different value: try to eliminate the redundant store. */
  344. if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
  345. IRIns *ir;
  346. /* Check for any intervening guards (includes conflicting loads).
  347. ** Note that lj_tab_keyindex and lj_vm_next don't need guards,
  348. ** since they are followed by at least one guarded VLOAD.
  349. */
  350. for (ir = IR(J->cur.nins-1); ir > store; ir--)
  351. if (irt_isguard(ir->t) || ir->o == IR_ALEN)
  352. goto doemit; /* No elimination possible. */
  353. /* Remove redundant store from chain and replace with NOP. */
  354. *refp = store->prev;
  355. lj_ir_nop(store);
  356. /* Now emit the new store instead. */
  357. }
  358. goto doemit;
  359. }
  360. ref = *(refp = &store->prev);
  361. }
  362. doemit:
  363. return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
  364. }
  365. /* ALEN forwarding. */
  366. TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J)
  367. {
  368. IRRef tab = fins->op1; /* Table reference. */
  369. IRRef lim = tab; /* Search limit. */
  370. IRRef ref;
  371. /* Search for conflicting HSTORE with numeric key. */
  372. ref = J->chain[IR_HSTORE];
  373. while (ref > lim) {
  374. IRIns *store = IR(ref);
  375. IRIns *href = IR(store->op1);
  376. IRIns *key = IR(href->op2);
  377. if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
  378. lim = ref; /* Conflicting store found, limits search for ALEN. */
  379. break;
  380. }
  381. ref = store->prev;
  382. }
  383. /* Try to find a matching ALEN. */
  384. ref = J->chain[IR_ALEN];
  385. while (ref > lim) {
  386. /* CSE for ALEN only depends on the table, not the hint. */
  387. if (IR(ref)->op1 == tab) {
  388. IRRef sref;
  389. /* Search for aliasing table.clear. */
  390. if (!fwd_aa_tab_clear(J, ref, tab))
  391. break;
  392. /* Search for hint-forwarding or conflicting store. */
  393. sref = J->chain[IR_ASTORE];
  394. while (sref > ref) {
  395. IRIns *store = IR(sref);
  396. IRIns *aref = IR(store->op1);
  397. IRIns *fref = IR(aref->op1);
  398. if (tab == fref->op1) { /* ASTORE to the same table. */
  399. /* Detect t[#t+1] = x idiom for push. */
  400. IRIns *idx = IR(aref->op2);
  401. if (!irt_isnil(store->t) &&
  402. idx->o == IR_ADD && idx->op1 == ref &&
  403. IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) {
  404. /* Note: this requires an extra PHI check in loop unroll. */
  405. fins->op2 = aref->op2; /* Set ALEN hint. */
  406. }
  407. goto doemit; /* Conflicting store, possibly giving a hint. */
  408. } else if (aa_table(J, tab, fref->op1) != ALIAS_NO) {
  409. goto doemit; /* Conflicting store. */
  410. }
  411. sref = store->prev;
  412. }
  413. return ref; /* Plain ALEN forwarding. */
  414. }
  415. ref = IR(ref)->prev;
  416. }
  417. doemit:
  418. return EMITFOLD;
  419. }
  420. /* -- ULOAD forwarding ---------------------------------------------------- */
  421. /* The current alias analysis for upvalues is very simplistic. It only
  422. ** disambiguates between the unique upvalues of the same function.
  423. ** This is good enough for now, since most upvalues are read-only.
  424. **
  425. ** A more precise analysis would be feasible with the help of the parser:
  426. ** generate a unique key for every upvalue, even across all prototypes.
  427. ** Lacking a realistic use-case, it's unclear whether this is beneficial.
  428. */
  429. static AliasRet aa_uref(IRIns *refa, IRIns *refb)
  430. {
  431. if (refa->op1 == refb->op1) { /* Same function. */
  432. if (refa->op2 == refb->op2)
  433. return ALIAS_MUST; /* Same function, same upvalue idx. */
  434. else
  435. return ALIAS_NO; /* Same function, different upvalue idx. */
  436. } else { /* Different functions, check disambiguation hash values. */
  437. if (((refa->op2 ^ refb->op2) & 0xff)) {
  438. return ALIAS_NO; /* Upvalues with different hash values cannot alias. */
  439. } else if (refa->o != refb->o) {
  440. /* Different UREFx type, but need to confirm the UREFO really is open. */
  441. if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC;
  442. else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC;
  443. return ALIAS_NO;
  444. } else {
  445. /* No conclusion can be drawn for same hash value and same UREFx type. */
  446. return ALIAS_MAY;
  447. }
  448. }
  449. }
  450. /* ULOAD forwarding. */
  451. TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
  452. {
  453. IRRef uref = fins->op1;
  454. IRRef lim = REF_BASE; /* Search limit. */
  455. IRIns *xr = IR(uref);
  456. IRRef ref;
  457. /* Search for conflicting stores. */
  458. ref = J->chain[IR_USTORE];
  459. while (ref > lim) {
  460. IRIns *store = IR(ref);
  461. switch (aa_uref(xr, IR(store->op1))) {
  462. case ALIAS_NO: break; /* Continue searching. */
  463. case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */
  464. case ALIAS_MUST: return store->op2; /* Store forwarding. */
  465. }
  466. ref = store->prev;
  467. }
  468. cselim:
  469. /* Try to find a matching load. Below the conflicting store, if any. */
  470. ref = J->chain[IR_ULOAD];
  471. while (ref > lim) {
  472. IRIns *ir = IR(ref);
  473. if (ir->op1 == uref ||
  474. (IR(ir->op1)->op12 == IR(uref)->op12 && IR(ir->op1)->o == IR(uref)->o))
  475. return ref; /* Match for identical or equal UREFx (non-CSEable UREFO). */
  476. ref = ir->prev;
  477. }
  478. return lj_ir_emit(J);
  479. }
  480. /* USTORE elimination. */
  481. TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J)
  482. {
  483. IRRef xref = fins->op1; /* xREF reference. */
  484. IRRef val = fins->op2; /* Stored value reference. */
  485. IRIns *xr = IR(xref);
  486. IRRef1 *refp = &J->chain[IR_USTORE];
  487. IRRef ref = *refp;
  488. while (ref > xref) { /* Search for redundant or conflicting stores. */
  489. IRIns *store = IR(ref);
  490. switch (aa_uref(xr, IR(store->op1))) {
  491. case ALIAS_NO:
  492. break; /* Continue searching. */
  493. case ALIAS_MAY: /* Store to MAYBE the same location. */
  494. if (store->op2 != val) /* Conflict if the value is different. */
  495. goto doemit;
  496. break; /* Otherwise continue searching. */
  497. case ALIAS_MUST: /* Store to the same location. */
  498. if (store->op2 == val) /* Same value: drop the new store. */
  499. return DROPFOLD;
  500. /* Different value: try to eliminate the redundant store. */
  501. if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
  502. IRIns *ir;
  503. /* Check for any intervening guards (includes conflicting loads). */
  504. for (ir = IR(J->cur.nins-1); ir > store; ir--)
  505. if (irt_isguard(ir->t))
  506. goto doemit; /* No elimination possible. */
  507. /* Remove redundant store from chain and replace with NOP. */
  508. *refp = store->prev;
  509. lj_ir_nop(store);
  510. if (ref+1 < J->cur.nins &&
  511. store[1].o == IR_OBAR && store[1].op1 == xref) {
  512. IRRef1 *bp = &J->chain[IR_OBAR];
  513. IRIns *obar;
  514. for (obar = IR(*bp); *bp > ref+1; obar = IR(*bp))
  515. bp = &obar->prev;
  516. /* Remove OBAR, too. */
  517. *bp = obar->prev;
  518. lj_ir_nop(obar);
  519. }
  520. /* Now emit the new store instead. */
  521. }
  522. goto doemit;
  523. }
  524. ref = *(refp = &store->prev);
  525. }
  526. doemit:
  527. return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
  528. }
  529. /* -- FLOAD forwarding and FSTORE elimination ----------------------------- */
  530. /* Alias analysis for field access.
  531. ** Field loads are cheap and field stores are rare.
  532. ** Simple disambiguation based on field types is good enough.
  533. */
  534. static AliasRet aa_fref(jit_State *J, IRIns *refa, IRIns *refb)
  535. {
  536. if (refa->op2 != refb->op2)
  537. return ALIAS_NO; /* Different fields. */
  538. if (refa->op1 == refb->op1)
  539. return ALIAS_MUST; /* Same field, same object. */
  540. else if (refa->op2 >= IRFL_TAB_META && refa->op2 <= IRFL_TAB_NOMM)
  541. return aa_table(J, refa->op1, refb->op1); /* Disambiguate tables. */
  542. else
  543. return ALIAS_MAY; /* Same field, possibly different object. */
  544. }
  545. /* Only the loads for mutable fields end up here (see FOLD). */
  546. TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J)
  547. {
  548. IRRef oref = fins->op1; /* Object reference. */
  549. IRRef fid = fins->op2; /* Field ID. */
  550. IRRef lim = oref; /* Search limit. */
  551. IRRef ref;
  552. /* Search for conflicting stores. */
  553. ref = J->chain[IR_FSTORE];
  554. while (ref > oref) {
  555. IRIns *store = IR(ref);
  556. switch (aa_fref(J, fins, IR(store->op1))) {
  557. case ALIAS_NO: break; /* Continue searching. */
  558. case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */
  559. case ALIAS_MUST: return store->op2; /* Store forwarding. */
  560. }
  561. ref = store->prev;
  562. }
  563. /* No conflicting store: const-fold field loads from allocations. */
  564. if (fid == IRFL_TAB_META) {
  565. IRIns *ir = IR(oref);
  566. if (ir->o == IR_TNEW || ir->o == IR_TDUP)
  567. return lj_ir_knull(J, IRT_TAB);
  568. }
  569. cselim:
  570. /* Try to find a matching load. Below the conflicting store, if any. */
  571. return lj_opt_cselim(J, lim);
  572. }
  573. /* FSTORE elimination. */
  574. TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
  575. {
  576. IRRef fref = fins->op1; /* FREF reference. */
  577. IRRef val = fins->op2; /* Stored value reference. */
  578. IRIns *xr = IR(fref);
  579. IRRef1 *refp = &J->chain[IR_FSTORE];
  580. IRRef ref = *refp;
  581. while (ref > fref) { /* Search for redundant or conflicting stores. */
  582. IRIns *store = IR(ref);
  583. switch (aa_fref(J, xr, IR(store->op1))) {
  584. case ALIAS_NO:
  585. break; /* Continue searching. */
  586. case ALIAS_MAY:
  587. if (store->op2 != val) /* Conflict if the value is different. */
  588. goto doemit;
  589. break; /* Otherwise continue searching. */
  590. case ALIAS_MUST:
  591. if (store->op2 == val &&
  592. !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R))
  593. return DROPFOLD; /* Same value: drop the new store. */
  594. /* Different value: try to eliminate the redundant store. */
  595. if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
  596. IRIns *ir;
  597. /* Check for any intervening guards or conflicting loads. */
  598. for (ir = IR(J->cur.nins-1); ir > store; ir--)
  599. if (irt_isguard(ir->t) || (ir->o == IR_FLOAD && ir->op2 == xr->op2))
  600. goto doemit; /* No elimination possible. */
  601. /* Remove redundant store from chain and replace with NOP. */
  602. *refp = store->prev;
  603. lj_ir_nop(store);
  604. /* Now emit the new store instead. */
  605. }
  606. goto doemit;
  607. }
  608. ref = *(refp = &store->prev);
  609. }
  610. doemit:
  611. return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
  612. }
  613. /* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */
  614. int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim)
  615. {
  616. IRRef ref;
  617. if (J->chain[IR_BUFPUT] > lim)
  618. return 0; /* Conflict. */
  619. ref = J->chain[IR_CALLS];
  620. while (ref > lim) {
  621. IRIns *ir = IR(ref);
  622. if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
  623. return 0; /* Conflict. */
  624. ref = ir->prev;
  625. }
  626. ref = J->chain[IR_CALLL];
  627. while (ref > lim) {
  628. IRIns *ir = IR(ref);
  629. if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
  630. return 0; /* Conflict. */
  631. ref = ir->prev;
  632. }
  633. return 1; /* No conflict. Can safely FOLD/CSE. */
  634. }
  635. /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */
  636. /* Find cdata allocation for a reference (if any). */
  637. static IRIns *aa_findcnew(jit_State *J, IRIns *ir)
  638. {
  639. while (ir->o == IR_ADD) {
  640. if (!irref_isk(ir->op1)) {
  641. IRIns *ir1 = aa_findcnew(J, IR(ir->op1)); /* Left-recursion. */
  642. if (ir1) return ir1;
  643. }
  644. if (irref_isk(ir->op2)) return NULL;
  645. ir = IR(ir->op2); /* Flatten right-recursion. */
  646. }
  647. return ir->o == IR_CNEW ? ir : NULL;
  648. }
  649. /* Alias analysis for two cdata allocations. */
  650. static AliasRet aa_cnew(jit_State *J, IRIns *refa, IRIns *refb)
  651. {
  652. IRIns *cnewa = aa_findcnew(J, refa);
  653. IRIns *cnewb = aa_findcnew(J, refb);
  654. if (cnewa == cnewb)
  655. return ALIAS_MAY; /* Same allocation or neither is an allocation. */
  656. if (cnewa && cnewb)
  657. return ALIAS_NO; /* Two different allocations never alias. */
  658. if (cnewb) { cnewa = cnewb; refb = refa; }
  659. return aa_escape(J, cnewa, refb);
  660. }
  661. /* Alias analysis for XLOAD/XSTORE. */
  662. static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb)
  663. {
  664. ptrdiff_t ofsa = 0, ofsb = 0;
  665. IRIns *refb = IR(xb->op1);
  666. IRIns *basea = refa, *baseb = refb;
  667. if (refa == refb && irt_sametype(xa->t, xb->t))
  668. return ALIAS_MUST; /* Shortcut for same refs with identical type. */
  669. /* Offset-based disambiguation. */
  670. if (refa->o == IR_ADD && irref_isk(refa->op2)) {
  671. IRIns *irk = IR(refa->op2);
  672. basea = IR(refa->op1);
  673. ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
  674. (ptrdiff_t)irk->i;
  675. }
  676. if (refb->o == IR_ADD && irref_isk(refb->op2)) {
  677. IRIns *irk = IR(refb->op2);
  678. baseb = IR(refb->op1);
  679. ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
  680. (ptrdiff_t)irk->i;
  681. }
  682. /* Treat constified pointers like base vs. base+offset. */
  683. if (basea->o == IR_KPTR && baseb->o == IR_KPTR) {
  684. ofsb += (char *)ir_kptr(baseb) - (char *)ir_kptr(basea);
  685. baseb = basea;
  686. }
  687. /* This implements (very) strict aliasing rules.
  688. ** Different types do NOT alias, except for differences in signedness.
  689. ** Type punning through unions is allowed (but forces a reload).
  690. */
  691. if (basea == baseb) {
  692. ptrdiff_t sza = irt_size(xa->t), szb = irt_size(xb->t);
  693. if (ofsa == ofsb) {
  694. if (sza == szb && irt_isfp(xa->t) == irt_isfp(xb->t))
  695. return ALIAS_MUST; /* Same-sized, same-kind. May need to convert. */
  696. } else if (ofsa + sza <= ofsb || ofsb + szb <= ofsa) {
  697. return ALIAS_NO; /* Non-overlapping base+-o1 vs. base+-o2. */
  698. }
  699. /* NYI: extract, extend or reinterpret bits (int <-> fp). */
  700. return ALIAS_MAY; /* Overlapping or type punning: force reload. */
  701. }
  702. if (!irt_sametype(xa->t, xb->t) &&
  703. !(irt_typerange(xa->t, IRT_I8, IRT_U64) &&
  704. ((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1))
  705. return ALIAS_NO;
  706. /* NYI: structural disambiguation. */
  707. return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */
  708. }
  709. /* Return CSEd reference or 0. Caveat: swaps lower ref to the right! */
  710. static IRRef reassoc_trycse(jit_State *J, IROp op, IRRef op1, IRRef op2)
  711. {
  712. IRRef ref = J->chain[op];
  713. IRRef lim = op1;
  714. if (op2 > lim) { lim = op2; op2 = op1; op1 = lim; }
  715. while (ref > lim) {
  716. IRIns *ir = IR(ref);
  717. if (ir->op1 == op1 && ir->op2 == op2)
  718. return ref;
  719. ref = ir->prev;
  720. }
  721. return 0;
  722. }
  723. /* Reassociate index references. */
  724. static IRRef reassoc_xref(jit_State *J, IRIns *ir)
  725. {
  726. ptrdiff_t ofs = 0;
  727. if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Get constant offset. */
  728. IRIns *irk = IR(ir->op2);
  729. ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
  730. (ptrdiff_t)irk->i;
  731. ir = IR(ir->op1);
  732. }
  733. if (ir->o == IR_ADD) { /* Add of base + index. */
  734. /* Index ref > base ref for loop-carried dependences. Only check op1. */
  735. IRIns *ir2, *ir1 = IR(ir->op1);
  736. int32_t shift = 0;
  737. IRRef idxref;
  738. /* Determine index shifts. Don't bother with IR_MUL here. */
  739. if (ir1->o == IR_BSHL && irref_isk(ir1->op2))
  740. shift = IR(ir1->op2)->i;
  741. else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2)
  742. shift = 1;
  743. else
  744. ir1 = ir;
  745. ir2 = IR(ir1->op1);
  746. /* A non-reassociated add. Must be a loop-carried dependence. */
  747. if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2))
  748. ofs += (ptrdiff_t)IR(ir2->op2)->i << shift;
  749. else
  750. return 0;
  751. idxref = ir2->op1;
  752. /* Try to CSE the reassociated chain. Give up if not found. */
  753. if (ir1 != ir &&
  754. !(idxref = reassoc_trycse(J, ir1->o, idxref,
  755. ir1->o == IR_BSHL ? ir1->op2 : idxref)))
  756. return 0;
  757. if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2)))
  758. return 0;
  759. if (ofs != 0) {
  760. IRRef refk = tref_ref(lj_ir_kintp(J, ofs));
  761. if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk)))
  762. return 0;
  763. }
  764. return idxref; /* Success, found a reassociated index reference. Phew. */
  765. }
  766. return 0; /* Failure. */
  767. }
  768. /* XLOAD forwarding. */
  769. TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
  770. {
  771. IRRef xref = fins->op1;
  772. IRIns *xr = IR(xref);
  773. IRRef lim = xref; /* Search limit. */
  774. IRRef ref;
  775. if ((fins->op2 & IRXLOAD_READONLY))
  776. goto cselim;
  777. if ((fins->op2 & IRXLOAD_VOLATILE))
  778. goto doemit;
  779. /* Search for conflicting stores. */
  780. ref = J->chain[IR_XSTORE];
  781. retry:
  782. if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS];
  783. if (J->chain[IR_XBAR] > lim) lim = J->chain[IR_XBAR];
  784. while (ref > lim) {
  785. IRIns *store = IR(ref);
  786. switch (aa_xref(J, xr, fins, store)) {
  787. case ALIAS_NO: break; /* Continue searching. */
  788. case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */
  789. case ALIAS_MUST:
  790. /* Emit conversion if the loaded type doesn't match the forwarded type. */
  791. if (!irt_sametype(fins->t, IR(store->op2)->t)) {
  792. IRType dt = irt_type(fins->t), st = irt_type(IR(store->op2)->t);
  793. if (dt == IRT_I8 || dt == IRT_I16) { /* Trunc + sign-extend. */
  794. st = dt | IRCONV_SEXT;
  795. dt = IRT_INT;
  796. } else if (dt == IRT_U8 || dt == IRT_U16) { /* Trunc + zero-extend. */
  797. st = dt;
  798. dt = IRT_INT;
  799. }
  800. fins->ot = IRT(IR_CONV, dt);
  801. fins->op1 = store->op2;
  802. fins->op2 = (dt<<5)|st;
  803. return RETRYFOLD;
  804. }
  805. return store->op2; /* Store forwarding. */
  806. }
  807. ref = store->prev;
  808. }
  809. cselim:
  810. /* Try to find a matching load. Below the conflicting store, if any. */
  811. ref = J->chain[IR_XLOAD];
  812. while (ref > lim) {
  813. /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
  814. if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t))
  815. return ref;
  816. ref = IR(ref)->prev;
  817. }
  818. /* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */
  819. if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] &&
  820. xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) {
  821. ref = J->chain[IR_XSTORE];
  822. while (ref > lim) /* Skip stores that have already been checked. */
  823. ref = IR(ref)->prev;
  824. lim = xref;
  825. xr = IR(xref);
  826. goto retry; /* Retry with the reassociated reference. */
  827. }
  828. doemit:
  829. return EMITFOLD;
  830. }
  831. /* XSTORE elimination. */
  832. TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J)
  833. {
  834. IRRef xref = fins->op1;
  835. IRIns *xr = IR(xref);
  836. IRRef lim = xref; /* Search limit. */
  837. IRRef val = fins->op2; /* Stored value reference. */
  838. IRRef1 *refp = &J->chain[IR_XSTORE];
  839. IRRef ref = *refp;
  840. if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS];
  841. if (J->chain[IR_XBAR] > lim) lim = J->chain[IR_XBAR];
  842. if (J->chain[IR_XSNEW] > lim) lim = J->chain[IR_XSNEW];
  843. while (ref > lim) { /* Search for redundant or conflicting stores. */
  844. IRIns *store = IR(ref);
  845. switch (aa_xref(J, xr, fins, store)) {
  846. case ALIAS_NO:
  847. break; /* Continue searching. */
  848. case ALIAS_MAY:
  849. if (store->op2 != val) /* Conflict if the value is different. */
  850. goto doemit;
  851. break; /* Otherwise continue searching. */
  852. case ALIAS_MUST:
  853. if (store->op2 == val) /* Same value: drop the new store. */
  854. return DROPFOLD;
  855. /* Different value: try to eliminate the redundant store. */
  856. if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
  857. IRIns *ir;
  858. /* Check for any intervening guards or any XLOADs (no AA performed). */
  859. for (ir = IR(J->cur.nins-1); ir > store; ir--)
  860. if (irt_isguard(ir->t) || ir->o == IR_XLOAD)
  861. goto doemit; /* No elimination possible. */
  862. /* Remove redundant store from chain and replace with NOP. */
  863. *refp = store->prev;
  864. lj_ir_nop(store);
  865. /* Now emit the new store instead. */
  866. }
  867. goto doemit;
  868. }
  869. ref = *(refp = &store->prev);
  870. }
  871. doemit:
  872. return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
  873. }
  874. /* -- ASTORE/HSTORE previous type analysis -------------------------------- */
  875. /* Check whether the previous value for a table store is non-nil.
  876. ** This can be derived either from a previous store or from a previous
  877. ** load (because all loads from tables perform a type check).
  878. **
  879. ** The result of the analysis can be used to avoid the metatable check
  880. ** and the guard against HREF returning niltv. Both of these are cheap,
  881. ** so let's not spend too much effort on the analysis.
  882. **
  883. ** A result of 1 is exact: previous value CANNOT be nil.
  884. ** A result of 0 is inexact: previous value MAY be nil.
  885. */
  886. int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref)
  887. {
  888. /* First check stores. */
  889. IRRef ref = J->chain[loadop+IRDELTA_L2S];
  890. while (ref > xref) {
  891. IRIns *store = IR(ref);
  892. if (store->op1 == xref) { /* Same xREF. */
  893. /* A nil store MAY alias, but a non-nil store MUST alias. */
  894. return !irt_isnil(store->t);
  895. } else if (irt_isnil(store->t)) { /* Must check any nil store. */
  896. IRRef skref = IR(store->op1)->op2;
  897. IRRef xkref = IR(xref)->op2;
  898. /* Same key type MAY alias. Need ALOAD check due to multiple int types. */
  899. if (loadop == IR_ALOAD || irt_sametype(IR(skref)->t, IR(xkref)->t)) {
  900. if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref))
  901. return 0; /* A nil store with same const key or var key MAY alias. */
  902. /* Different const keys CANNOT alias. */
  903. } else if (irt_isp32(IR(skref)->t) != irt_isp32(IR(xkref)->t)) {
  904. return 0; /* HREF and HREFK MAY alias. */
  905. } /* Different key types CANNOT alias. */
  906. } /* Other non-nil stores MAY alias. */
  907. ref = store->prev;
  908. }
  909. /* Check loads since nothing could be derived from stores. */
  910. ref = J->chain[loadop];
  911. while (ref > xref) {
  912. IRIns *load = IR(ref);
  913. if (load->op1 == xref) { /* Same xREF. */
  914. /* A nil load MAY alias, but a non-nil load MUST alias. */
  915. return !irt_isnil(load->t);
  916. } /* Other non-nil loads MAY alias. */
  917. ref = load->prev;
  918. }
  919. return 0; /* Nothing derived at all, previous value MAY be nil. */
  920. }
  921. /* ------------------------------------------------------------------------ */
  922. #undef IR
  923. #undef fins
  924. #undef fleft
  925. #undef fright
  926. #endif