lj_opt_split.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861
  1. /*
  2. ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
  3. ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
  4. */
  5. #define lj_opt_split_c
  6. #define LUA_CORE
  7. #include "lj_obj.h"
  8. #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
  9. #include "lj_err.h"
  10. #include "lj_buf.h"
  11. #include "lj_ir.h"
  12. #include "lj_jit.h"
  13. #include "lj_ircall.h"
  14. #include "lj_iropt.h"
  15. #include "lj_vm.h"
  16. /* SPLIT pass:
  17. **
  18. ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
  19. ** instructions. It's only active for soft-float targets or for 32 bit CPUs
  20. ** which lack native 64 bit integer operations (the FFI is currently the
  21. ** only emitter for 64 bit integer instructions).
  22. **
  23. ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
  24. ** backend simple. Only a small amount of extra functionality needs to be
  25. ** implemented. This is much easier than adding support for allocating
  26. ** register pairs to each backend (believe me, I tried). A few simple, but
  27. ** important optimizations can be performed by the SPLIT pass, which would
  28. ** be tedious to do in the backend.
  29. **
  30. ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
  31. ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
  32. ** through FOLD or any other optimizations, so each HIOP is guaranteed to
  33. ** immediately follow it's counterpart. The actual functionality of HIOP is
  34. ** inferred from the previous instruction.
  35. **
  36. ** The operands of HIOP hold the hiword input references. The output of HIOP
  37. ** is the hiword output reference, which is also used to hold the hiword
  38. ** register or spill slot information. The register allocator treats this
  39. ** instruction independently of any other instruction, which improves code
  40. ** quality compared to using fixed register pairs.
  41. **
  42. ** It's easier to split up some instructions into two regular 32 bit
  43. ** instructions. E.g. XLOAD is split up into two XLOADs with two different
  44. ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
  45. ** constants, too. Some hiword instructions can be entirely omitted, e.g.
  46. ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
  47. ** are split up into two 32 bit arguments each.
  48. **
  49. ** On soft-float targets, floating-point instructions are directly converted
  50. ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
  51. ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
  52. **
  53. ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
  54. ** two int64_t fields:
  55. **
  56. ** 0100 p32 ADD base +8
  57. ** 0101 i64 XLOAD 0100
  58. ** 0102 i64 ADD 0101 +1
  59. ** 0103 p32 ADD base +16
  60. ** 0104 i64 XSTORE 0103 0102
  61. **
  62. ** mov rax, [esi+0x8]
  63. ** add rax, +0x01
  64. ** mov [esi+0x10], rax
  65. **
  66. ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
  67. **
  68. ** 0100 p32 ADD base +8
  69. ** 0101 int XLOAD 0100
  70. ** 0102 p32 ADD base +12
  71. ** 0103 int XLOAD 0102
  72. ** 0104 int ADD 0101 +1
  73. ** 0105 int HIOP 0103 +0
  74. ** 0106 p32 ADD base +16
  75. ** 0107 int XSTORE 0106 0104
  76. ** 0108 int HIOP 0106 0105
  77. **
  78. ** mov eax, [esi+0x8]
  79. ** mov ecx, [esi+0xc]
  80. ** add eax, +0x01
  81. ** adc ecx, +0x00
  82. ** mov [esi+0x10], eax
  83. ** mov [esi+0x14], ecx
  84. **
  85. ** You may notice the reassociated hiword address computation, which is
  86. ** later fused into the mov operands by the assembler.
  87. */
  88. /* Some local macros to save typing. Undef'd at the end. */
  89. #define IR(ref) (&J->cur.ir[(ref)])
  90. /* Directly emit the transformed IR without updating chains etc. */
  91. static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
  92. {
  93. IRRef nref = lj_ir_nextins(J);
  94. IRIns *ir = IR(nref);
  95. ir->ot = ot;
  96. ir->op1 = op1;
  97. ir->op2 = op2;
  98. return nref;
  99. }
  100. #if LJ_SOFTFP
  101. /* Emit a (checked) number to integer conversion. */
  102. static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
  103. {
  104. IRRef tmp, res;
  105. #if LJ_LE
  106. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
  107. #else
  108. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
  109. #endif
  110. res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
  111. if (check) {
  112. tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
  113. split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  114. split_emit(J, IRTGI(IR_EQ), tmp, lo);
  115. split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
  116. }
  117. return res;
  118. }
  119. /* Emit a CALLN with one split 64 bit argument. */
  120. static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
  121. IRIns *ir, IRCallID id)
  122. {
  123. IRRef tmp, op1 = ir->op1;
  124. J->cur.nins--;
  125. #if LJ_LE
  126. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  127. #else
  128. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  129. #endif
  130. ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
  131. return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  132. }
  133. #endif
  134. /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
  135. static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
  136. IRIns *ir, IRCallID id)
  137. {
  138. IRRef tmp, op1 = ir->op1, op2 = ir->op2;
  139. J->cur.nins--;
  140. #if LJ_LE
  141. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  142. #else
  143. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  144. #endif
  145. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
  146. ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
  147. return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  148. }
  149. /* Emit a CALLN with two split 64 bit arguments. */
  150. static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
  151. IRIns *ir, IRCallID id)
  152. {
  153. IRRef tmp, op1 = ir->op1, op2 = ir->op2;
  154. J->cur.nins--;
  155. #if LJ_LE
  156. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  157. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
  158. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
  159. #else
  160. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  161. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
  162. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
  163. #endif
  164. ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
  165. return split_emit(J,
  166. IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
  167. tmp, tmp);
  168. }
  169. /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
  170. static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
  171. {
  172. IRRef nref = oir[ref].prev;
  173. IRIns *ir = IR(nref);
  174. int32_t ofs = 4;
  175. if (ir->o == IR_KPTR)
  176. return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
  177. if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
  178. /* Reassociate address. */
  179. ofs += IR(ir->op2)->i;
  180. nref = ir->op1;
  181. if (ofs == 0) return nref;
  182. }
  183. return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
  184. }
  185. #if LJ_HASFFI
  186. static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
  187. IRIns *oir, IRIns *nir, IRIns *ir)
  188. {
  189. IROp op = ir->o;
  190. IRRef kref = nir->op2;
  191. if (irref_isk(kref)) { /* Optimize constant shifts. */
  192. int32_t k = (IR(kref)->i & 63);
  193. IRRef lo = nir->op1, hi = hisubst[ir->op1];
  194. if (op == IR_BROL || op == IR_BROR) {
  195. if (op == IR_BROR) k = (-k & 63);
  196. if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
  197. if (k == 0) {
  198. passthrough:
  199. J->cur.nins--;
  200. ir->prev = lo;
  201. return hi;
  202. } else {
  203. TRef k1, k2;
  204. IRRef t1, t2, t3, t4;
  205. J->cur.nins--;
  206. k1 = lj_ir_kint(J, k);
  207. k2 = lj_ir_kint(J, (-k & 31));
  208. t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
  209. t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
  210. t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
  211. t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
  212. ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
  213. return split_emit(J, IRTI(IR_BOR), t2, t3);
  214. }
  215. } else if (k == 0) {
  216. goto passthrough;
  217. } else if (k < 32) {
  218. if (op == IR_BSHL) {
  219. IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
  220. IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
  221. return split_emit(J, IRTI(IR_BOR), t1, t2);
  222. } else {
  223. IRRef t1 = ir->prev, t2;
  224. lua_assert(op == IR_BSHR || op == IR_BSAR);
  225. nir->o = IR_BSHR;
  226. t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
  227. ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
  228. return split_emit(J, IRTI(op), hi, kref);
  229. }
  230. } else {
  231. if (op == IR_BSHL) {
  232. if (k == 32)
  233. J->cur.nins--;
  234. else
  235. lo = ir->prev;
  236. ir->prev = lj_ir_kint(J, 0);
  237. return lo;
  238. } else {
  239. lua_assert(op == IR_BSHR || op == IR_BSAR);
  240. if (k == 32) {
  241. J->cur.nins--;
  242. ir->prev = hi;
  243. } else {
  244. nir->op1 = hi;
  245. }
  246. if (op == IR_BSHR)
  247. return lj_ir_kint(J, 0);
  248. else
  249. return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
  250. }
  251. }
  252. }
  253. return split_call_li(J, hisubst, oir, ir,
  254. op - IR_BSHL + IRCALL_lj_carith_shl64);
  255. }
  256. static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
  257. IRIns *nir, IRIns *ir)
  258. {
  259. IROp op = ir->o;
  260. IRRef hi, kref = nir->op2;
  261. if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
  262. int32_t k = IR(kref)->i;
  263. if (k == 0 || k == -1) {
  264. if (op == IR_BAND) k = ~k;
  265. if (k == 0) {
  266. J->cur.nins--;
  267. ir->prev = nir->op1;
  268. } else if (op == IR_BXOR) {
  269. nir->o = IR_BNOT;
  270. nir->op2 = 0;
  271. } else {
  272. J->cur.nins--;
  273. ir->prev = kref;
  274. }
  275. }
  276. }
  277. hi = hisubst[ir->op1];
  278. kref = hisubst[ir->op2];
  279. if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
  280. int32_t k = IR(kref)->i;
  281. if (k == 0 || k == -1) {
  282. if (op == IR_BAND) k = ~k;
  283. if (k == 0) {
  284. return hi;
  285. } else if (op == IR_BXOR) {
  286. return split_emit(J, IRTI(IR_BNOT), hi, 0);
  287. } else {
  288. return kref;
  289. }
  290. }
  291. }
  292. return split_emit(J, IRTI(op), hi, kref);
  293. }
  294. #endif
  295. /* Substitute references of a snapshot. */
  296. static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
  297. {
  298. SnapEntry *map = &J->cur.snapmap[snap->mapofs];
  299. MSize n, nent = snap->nent;
  300. for (n = 0; n < nent; n++) {
  301. SnapEntry sn = map[n];
  302. IRIns *ir = &oir[snap_ref(sn)];
  303. if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
  304. map[n] = ((sn & 0xffff0000) | ir->prev);
  305. }
  306. }
  307. /* Transform the old IR to the new IR. */
  308. static void split_ir(jit_State *J)
  309. {
  310. IRRef nins = J->cur.nins, nk = J->cur.nk;
  311. MSize irlen = nins - nk;
  312. MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
  313. IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
  314. IRRef1 *hisubst;
  315. IRRef ref, snref;
  316. SnapShot *snap;
  317. /* Copy old IR to buffer. */
  318. memcpy(oir, IR(nk), irlen*sizeof(IRIns));
  319. /* Bias hiword substitution table and old IR. Loword kept in field prev. */
  320. hisubst = (IRRef1 *)&oir[irlen] - nk;
  321. oir -= nk;
  322. /* Remove all IR instructions, but retain IR constants. */
  323. J->cur.nins = REF_FIRST;
  324. J->loopref = 0;
  325. /* Process constants and fixed references. */
  326. for (ref = nk; ref <= REF_BASE; ref++) {
  327. IRIns *ir = &oir[ref];
  328. if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
  329. /* Split up 64 bit constant. */
  330. TValue tv = *ir_k64(ir);
  331. ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
  332. hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
  333. } else {
  334. ir->prev = ref; /* Identity substitution for loword. */
  335. hisubst[ref] = 0;
  336. }
  337. }
  338. /* Process old IR instructions. */
  339. snap = J->cur.snap;
  340. snref = snap->ref;
  341. for (ref = REF_FIRST; ref < nins; ref++) {
  342. IRIns *ir = &oir[ref];
  343. IRRef nref = lj_ir_nextins(J);
  344. IRIns *nir = IR(nref);
  345. IRRef hi = 0;
  346. if (ref >= snref) {
  347. snap->ref = nref;
  348. split_subst_snap(J, snap++, oir);
  349. snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
  350. }
  351. /* Copy-substitute old instruction to new instruction. */
  352. nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
  353. nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
  354. ir->prev = nref; /* Loword substitution. */
  355. nir->o = ir->o;
  356. nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
  357. hisubst[ref] = 0;
  358. /* Split 64 bit instructions. */
  359. #if LJ_SOFTFP
  360. if (irt_isnum(ir->t)) {
  361. nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
  362. /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
  363. switch (ir->o) {
  364. case IR_ADD:
  365. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
  366. break;
  367. case IR_SUB:
  368. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
  369. break;
  370. case IR_MUL:
  371. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
  372. break;
  373. case IR_DIV:
  374. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
  375. break;
  376. case IR_POW:
  377. hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
  378. break;
  379. case IR_FPMATH:
  380. /* Try to rejoin pow from EXP2, MUL and LOG2. */
  381. if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
  382. IRIns *irp = IR(nir->op1);
  383. if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
  384. IRIns *irm4 = IR(irp->op1);
  385. IRIns *irm3 = IR(irm4->op1);
  386. IRIns *irm12 = IR(irm3->op1);
  387. IRIns *irl1 = IR(irm12->op1);
  388. if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
  389. irl1->op2 == IRCALL_lj_vm_log2) {
  390. IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
  391. IRRef arg3 = irm3->op2, arg4 = irm4->op2;
  392. J->cur.nins--;
  393. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
  394. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
  395. ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
  396. hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  397. break;
  398. }
  399. }
  400. }
  401. hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
  402. break;
  403. case IR_ATAN2:
  404. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
  405. break;
  406. case IR_LDEXP:
  407. hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
  408. break;
  409. case IR_NEG: case IR_ABS:
  410. nir->o = IR_CONV; /* Pass through loword. */
  411. nir->op2 = (IRT_INT << 5) | IRT_INT;
  412. hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
  413. hisubst[ir->op1], hisubst[ir->op2]);
  414. break;
  415. case IR_SLOAD:
  416. if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
  417. nir->op2 &= ~IRSLOAD_CONVERT;
  418. ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
  419. IRCALL_softfp_i2d);
  420. hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  421. break;
  422. }
  423. /* fallthrough */
  424. case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
  425. case IR_STRTO:
  426. hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  427. break;
  428. case IR_XLOAD: {
  429. IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
  430. J->cur.nins--;
  431. hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
  432. #if LJ_BE
  433. hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
  434. inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
  435. #endif
  436. nref = lj_ir_nextins(J);
  437. nir = IR(nref);
  438. *nir = inslo; /* Re-emit lo XLOAD. */
  439. #if LJ_LE
  440. hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
  441. ir->prev = nref;
  442. #else
  443. ir->prev = hi; hi = nref;
  444. #endif
  445. break;
  446. }
  447. case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
  448. split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
  449. break;
  450. case IR_CONV: { /* Conversion to number. Others handled below. */
  451. IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  452. UNUSED(st);
  453. #if LJ_32 && LJ_HASFFI
  454. if (st == IRT_I64 || st == IRT_U64) {
  455. hi = split_call_l(J, hisubst, oir, ir,
  456. st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
  457. break;
  458. }
  459. #endif
  460. lua_assert(st == IRT_INT ||
  461. (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
  462. nir->o = IR_CALLN;
  463. #if LJ_32 && LJ_HASFFI
  464. nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
  465. st == IRT_FLOAT ? IRCALL_softfp_f2d :
  466. IRCALL_softfp_ui2d;
  467. #else
  468. nir->op2 = IRCALL_softfp_i2d;
  469. #endif
  470. hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  471. break;
  472. }
  473. case IR_CALLN:
  474. case IR_CALLL:
  475. case IR_CALLS:
  476. case IR_CALLXS:
  477. goto split_call;
  478. case IR_PHI:
  479. if (nir->op1 == nir->op2)
  480. J->cur.nins--; /* Drop useless PHIs. */
  481. if (hisubst[ir->op1] != hisubst[ir->op2])
  482. split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
  483. hisubst[ir->op1], hisubst[ir->op2]);
  484. break;
  485. case IR_HIOP:
  486. J->cur.nins--; /* Drop joining HIOP. */
  487. ir->prev = nir->op1;
  488. hi = nir->op2;
  489. break;
  490. default:
  491. lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
  492. hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
  493. hisubst[ir->op1], hisubst[ir->op2]);
  494. break;
  495. }
  496. } else
  497. #endif
  498. #if LJ_32 && LJ_HASFFI
  499. if (irt_isint64(ir->t)) {
  500. IRRef hiref = hisubst[ir->op1];
  501. nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
  502. switch (ir->o) {
  503. case IR_ADD:
  504. case IR_SUB:
  505. /* Use plain op for hiword if loword cannot produce a carry/borrow. */
  506. if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
  507. ir->prev = nir->op1; /* Pass through loword. */
  508. nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
  509. hi = nref;
  510. break;
  511. }
  512. /* fallthrough */
  513. case IR_NEG:
  514. hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
  515. break;
  516. case IR_MUL:
  517. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
  518. break;
  519. case IR_DIV:
  520. hi = split_call_ll(J, hisubst, oir, ir,
  521. irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
  522. IRCALL_lj_carith_divu64);
  523. break;
  524. case IR_MOD:
  525. hi = split_call_ll(J, hisubst, oir, ir,
  526. irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
  527. IRCALL_lj_carith_modu64);
  528. break;
  529. case IR_POW:
  530. hi = split_call_ll(J, hisubst, oir, ir,
  531. irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
  532. IRCALL_lj_carith_powu64);
  533. break;
  534. case IR_BNOT:
  535. hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
  536. break;
  537. case IR_BSWAP:
  538. ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
  539. hi = nref;
  540. break;
  541. case IR_BAND: case IR_BOR: case IR_BXOR:
  542. hi = split_bitop(J, hisubst, nir, ir);
  543. break;
  544. case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
  545. hi = split_bitshift(J, hisubst, oir, nir, ir);
  546. break;
  547. case IR_FLOAD:
  548. lua_assert(ir->op2 == IRFL_CDATA_INT64);
  549. hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
  550. #if LJ_BE
  551. ir->prev = hi; hi = nref;
  552. #endif
  553. break;
  554. case IR_XLOAD:
  555. hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
  556. #if LJ_BE
  557. ir->prev = hi; hi = nref;
  558. #endif
  559. break;
  560. case IR_XSTORE:
  561. split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
  562. break;
  563. case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
  564. IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  565. #if LJ_SOFTFP
  566. if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
  567. hi = split_call_l(J, hisubst, oir, ir,
  568. irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
  569. } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
  570. nir->o = IR_CALLN;
  571. nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
  572. hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
  573. }
  574. #else
  575. if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
  576. hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
  577. }
  578. #endif
  579. else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
  580. /* Drop cast, since assembler doesn't care. But fwd both parts. */
  581. hi = hiref;
  582. goto fwdlo;
  583. } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
  584. IRRef k31 = lj_ir_kint(J, 31);
  585. nir = IR(nref); /* May have been reallocated. */
  586. ir->prev = nir->op1; /* Pass through loword. */
  587. nir->o = IR_BSAR; /* hi = bsar(lo, 31). */
  588. nir->op2 = k31;
  589. hi = nref;
  590. } else { /* Zero-extend to 64 bit. */
  591. hi = lj_ir_kint(J, 0);
  592. goto fwdlo;
  593. }
  594. break;
  595. }
  596. case IR_CALLXS:
  597. goto split_call;
  598. case IR_PHI: {
  599. IRRef hiref2;
  600. if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
  601. nir->op1 == nir->op2)
  602. J->cur.nins--; /* Drop useless PHIs. */
  603. hiref2 = hisubst[ir->op2];
  604. if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
  605. split_emit(J, IRTI(IR_PHI), hiref, hiref2);
  606. break;
  607. }
  608. case IR_HIOP:
  609. J->cur.nins--; /* Drop joining HIOP. */
  610. ir->prev = nir->op1;
  611. hi = nir->op2;
  612. break;
  613. default:
  614. lua_assert(ir->o <= IR_NE); /* Comparisons. */
  615. split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
  616. break;
  617. }
  618. } else
  619. #endif
  620. #if LJ_SOFTFP
  621. if (ir->o == IR_SLOAD) {
  622. if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */
  623. nir->op2 &= ~IRSLOAD_CONVERT;
  624. if (!(nir->op2 & IRSLOAD_TYPECHECK))
  625. nir->t.irt = IRT_INT; /* Drop guard. */
  626. split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  627. ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
  628. }
  629. } else if (ir->o == IR_TOBIT) {
  630. IRRef tmp, op1 = ir->op1;
  631. J->cur.nins--;
  632. #if LJ_LE
  633. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  634. #else
  635. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  636. #endif
  637. ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
  638. } else if (ir->o == IR_TOSTR) {
  639. if (hisubst[ir->op1]) {
  640. if (irref_isk(ir->op1))
  641. nir->op1 = ir->op1;
  642. else
  643. split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
  644. }
  645. } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
  646. if (irref_isk(ir->op2) && hisubst[ir->op2])
  647. nir->op2 = ir->op2;
  648. } else
  649. #endif
  650. if (ir->o == IR_CONV) { /* See above, too. */
  651. IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  652. #if LJ_32 && LJ_HASFFI
  653. if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
  654. #if LJ_SOFTFP
  655. if (irt_isfloat(ir->t)) {
  656. split_call_l(J, hisubst, oir, ir,
  657. st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
  658. J->cur.nins--; /* Drop unused HIOP. */
  659. }
  660. #else
  661. if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
  662. ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
  663. hisubst[ir->op1], nref);
  664. }
  665. #endif
  666. else { /* Truncate to lower 32 bits. */
  667. fwdlo:
  668. ir->prev = nir->op1; /* Forward loword. */
  669. /* Replace with NOP to avoid messing up the snapshot logic. */
  670. nir->ot = IRT(IR_NOP, IRT_NIL);
  671. nir->op1 = nir->op2 = 0;
  672. }
  673. }
  674. #endif
  675. #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
  676. else if (irt_isfloat(ir->t)) {
  677. if (st == IRT_NUM) {
  678. split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
  679. J->cur.nins--; /* Drop unused HIOP. */
  680. } else {
  681. nir->o = IR_CALLN;
  682. nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
  683. }
  684. } else if (st == IRT_FLOAT) {
  685. nir->o = IR_CALLN;
  686. nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
  687. } else
  688. #endif
  689. #if LJ_SOFTFP
  690. if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
  691. if (irt_isguard(ir->t)) {
  692. lua_assert(st == IRT_NUM && irt_isint(ir->t));
  693. J->cur.nins--;
  694. ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
  695. } else {
  696. split_call_l(J, hisubst, oir, ir,
  697. #if LJ_32 && LJ_HASFFI
  698. st == IRT_NUM ?
  699. (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
  700. (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
  701. #else
  702. IRCALL_softfp_d2i
  703. #endif
  704. );
  705. J->cur.nins--; /* Drop unused HIOP. */
  706. }
  707. }
  708. #endif
  709. } else if (ir->o == IR_CALLXS) {
  710. IRRef hiref;
  711. split_call:
  712. hiref = hisubst[ir->op1];
  713. if (hiref) {
  714. IROpT ot = nir->ot;
  715. IRRef op2 = nir->op2;
  716. nir->ot = IRT(IR_CARG, IRT_NIL);
  717. #if LJ_LE
  718. nir->op2 = hiref;
  719. #else
  720. nir->op2 = nir->op1; nir->op1 = hiref;
  721. #endif
  722. ir->prev = nref = split_emit(J, ot, nref, op2);
  723. }
  724. if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
  725. hi = split_emit(J,
  726. IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
  727. nref, nref);
  728. } else if (ir->o == IR_CARG) {
  729. IRRef hiref = hisubst[ir->op1];
  730. if (hiref) {
  731. IRRef op2 = nir->op2;
  732. #if LJ_LE
  733. nir->op2 = hiref;
  734. #else
  735. nir->op2 = nir->op1; nir->op1 = hiref;
  736. #endif
  737. ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
  738. nir = IR(nref);
  739. }
  740. hiref = hisubst[ir->op2];
  741. if (hiref) {
  742. #if !LJ_TARGET_X86
  743. int carg = 0;
  744. IRIns *cir;
  745. for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
  746. carg++;
  747. if ((carg & 1) == 0) { /* Align 64 bit arguments. */
  748. IRRef op2 = nir->op2;
  749. nir->op2 = REF_NIL;
  750. nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
  751. nir = IR(nref);
  752. }
  753. #endif
  754. #if LJ_BE
  755. { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
  756. #endif
  757. ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
  758. }
  759. } else if (ir->o == IR_CNEWI) {
  760. if (hisubst[ir->op2])
  761. split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
  762. } else if (ir->o == IR_LOOP) {
  763. J->loopref = nref; /* Needed by assembler. */
  764. }
  765. hisubst[ref] = hi; /* Store hiword substitution. */
  766. }
  767. if (snref == nins) { /* Substitution for last snapshot. */
  768. snap->ref = J->cur.nins;
  769. split_subst_snap(J, snap, oir);
  770. }
  771. /* Add PHI marks. */
  772. for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
  773. IRIns *ir = IR(ref);
  774. if (ir->o != IR_PHI) break;
  775. if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
  776. if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
  777. }
  778. }
  779. /* Protected callback for split pass. */
  780. static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
  781. {
  782. jit_State *J = (jit_State *)ud;
  783. split_ir(J);
  784. UNUSED(L); UNUSED(dummy);
  785. return NULL;
  786. }
  787. #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
  788. /* Slow, but sure way to check whether a SPLIT pass is needed. */
  789. static int split_needsplit(jit_State *J)
  790. {
  791. IRIns *ir, *irend;
  792. IRRef ref;
  793. for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
  794. if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
  795. return 1;
  796. if (LJ_SOFTFP) {
  797. for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
  798. if ((IR(ref)->op2 & IRSLOAD_CONVERT))
  799. return 1;
  800. if (J->chain[IR_TOBIT])
  801. return 1;
  802. }
  803. for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
  804. IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
  805. if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
  806. st == IRT_I64 || st == IRT_U64)
  807. return 1;
  808. }
  809. return 0; /* Nope. */
  810. }
  811. #endif
  812. /* SPLIT pass. */
  813. void lj_opt_split(jit_State *J)
  814. {
  815. #if LJ_SOFTFP
  816. if (!J->needsplit)
  817. J->needsplit = split_needsplit(J);
  818. #else
  819. lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
  820. #endif
  821. if (J->needsplit) {
  822. int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
  823. if (errcode) {
  824. /* Completely reset the trace to avoid inconsistent dump on abort. */
  825. J->cur.nins = J->cur.nk = REF_BASE;
  826. J->cur.nsnap = 0;
  827. lj_err_throw(J->L, errcode); /* Propagate errors. */
  828. }
  829. }
  830. }
  831. #undef IR
  832. #endif