lj_opt_split.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848
  1. /*
  2. ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
  3. ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
  4. */
  5. #define lj_opt_split_c
  6. #define LUA_CORE
  7. #include "lj_obj.h"
  8. #if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
  9. #include "lj_err.h"
  10. #include "lj_buf.h"
  11. #include "lj_ir.h"
  12. #include "lj_jit.h"
  13. #include "lj_ircall.h"
  14. #include "lj_iropt.h"
  15. #include "lj_dispatch.h"
  16. #include "lj_vm.h"
  17. /* SPLIT pass:
  18. **
  19. ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
  20. ** instructions. It's only active for soft-float targets or for 32 bit CPUs
  21. ** which lack native 64 bit integer operations (the FFI is currently the
  22. ** only emitter for 64 bit integer instructions).
  23. **
  24. ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
  25. ** backend simple. Only a small amount of extra functionality needs to be
  26. ** implemented. This is much easier than adding support for allocating
  27. ** register pairs to each backend (believe me, I tried). A few simple, but
  28. ** important optimizations can be performed by the SPLIT pass, which would
  29. ** be tedious to do in the backend.
  30. **
  31. ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
  32. ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
  33. ** through FOLD or any other optimizations, so each HIOP is guaranteed to
  34. ** immediately follow it's counterpart. The actual functionality of HIOP is
  35. ** inferred from the previous instruction.
  36. **
  37. ** The operands of HIOP hold the hiword input references. The output of HIOP
  38. ** is the hiword output reference, which is also used to hold the hiword
  39. ** register or spill slot information. The register allocator treats this
  40. ** instruction independently of any other instruction, which improves code
  41. ** quality compared to using fixed register pairs.
  42. **
  43. ** It's easier to split up some instructions into two regular 32 bit
  44. ** instructions. E.g. XLOAD is split up into two XLOADs with two different
  45. ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
  46. ** constants, too. Some hiword instructions can be entirely omitted, e.g.
  47. ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
  48. ** are split up into two 32 bit arguments each.
  49. **
  50. ** On soft-float targets, floating-point instructions are directly converted
  51. ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
  52. ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
  53. **
  54. ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
  55. ** two int64_t fields:
  56. **
  57. ** 0100 p32 ADD base +8
  58. ** 0101 i64 XLOAD 0100
  59. ** 0102 i64 ADD 0101 +1
  60. ** 0103 p32 ADD base +16
  61. ** 0104 i64 XSTORE 0103 0102
  62. **
  63. ** mov rax, [esi+0x8]
  64. ** add rax, +0x01
  65. ** mov [esi+0x10], rax
  66. **
  67. ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
  68. **
  69. ** 0100 p32 ADD base +8
  70. ** 0101 int XLOAD 0100
  71. ** 0102 p32 ADD base +12
  72. ** 0103 int XLOAD 0102
  73. ** 0104 int ADD 0101 +1
  74. ** 0105 int HIOP 0103 +0
  75. ** 0106 p32 ADD base +16
  76. ** 0107 int XSTORE 0106 0104
  77. ** 0108 int HIOP 0106 0105
  78. **
  79. ** mov eax, [esi+0x8]
  80. ** mov ecx, [esi+0xc]
  81. ** add eax, +0x01
  82. ** adc ecx, +0x00
  83. ** mov [esi+0x10], eax
  84. ** mov [esi+0x14], ecx
  85. **
  86. ** You may notice the reassociated hiword address computation, which is
  87. ** later fused into the mov operands by the assembler.
  88. */
  89. /* Some local macros to save typing. Undef'd at the end. */
  90. #define IR(ref) (&J->cur.ir[(ref)])
  91. /* Directly emit the transformed IR without updating chains etc. */
  92. static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
  93. {
  94. IRRef nref = lj_ir_nextins(J);
  95. IRIns *ir = IR(nref);
  96. ir->ot = ot;
  97. ir->op1 = op1;
  98. ir->op2 = op2;
  99. return nref;
  100. }
  101. #if LJ_SOFTFP
  102. /* Emit a (checked) number to integer conversion. */
  103. static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
  104. {
  105. IRRef tmp, res;
  106. #if LJ_LE
  107. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
  108. #else
  109. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
  110. #endif
  111. res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
  112. if (check) {
  113. tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
  114. split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  115. split_emit(J, IRTGI(IR_EQ), tmp, lo);
  116. split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
  117. }
  118. return res;
  119. }
  120. /* Emit a CALLN with one split 64 bit argument. */
  121. static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
  122. IRIns *ir, IRCallID id)
  123. {
  124. IRRef tmp, op1 = ir->op1;
  125. J->cur.nins--;
  126. #if LJ_LE
  127. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  128. #else
  129. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  130. #endif
  131. ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
  132. return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  133. }
  134. #endif
  135. /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
  136. static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
  137. IRIns *ir, IRCallID id)
  138. {
  139. IRRef tmp, op1 = ir->op1, op2 = ir->op2;
  140. J->cur.nins--;
  141. #if LJ_LE
  142. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  143. #else
  144. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  145. #endif
  146. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
  147. ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
  148. return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
  149. }
  150. /* Emit a CALLN with two split 64 bit arguments. */
  151. static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
  152. IRIns *ir, IRCallID id)
  153. {
  154. IRRef tmp, op1 = ir->op1, op2 = ir->op2;
  155. J->cur.nins--;
  156. #if LJ_LE
  157. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  158. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
  159. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
  160. #else
  161. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  162. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
  163. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
  164. #endif
  165. ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
  166. return split_emit(J,
  167. IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
  168. tmp, tmp);
  169. }
  170. /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
  171. static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
  172. {
  173. IRRef nref = oir[ref].prev;
  174. IRIns *ir = IR(nref);
  175. int32_t ofs = 4;
  176. if (ir->o == IR_KPTR)
  177. return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
  178. if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
  179. /* Reassociate address. */
  180. ofs += IR(ir->op2)->i;
  181. nref = ir->op1;
  182. if (ofs == 0) return nref;
  183. }
  184. return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
  185. }
  186. #if LJ_HASFFI
  187. static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
  188. IRIns *oir, IRIns *nir, IRIns *ir)
  189. {
  190. IROp op = ir->o;
  191. IRRef kref = nir->op2;
  192. if (irref_isk(kref)) { /* Optimize constant shifts. */
  193. int32_t k = (IR(kref)->i & 63);
  194. IRRef lo = nir->op1, hi = hisubst[ir->op1];
  195. if (op == IR_BROL || op == IR_BROR) {
  196. if (op == IR_BROR) k = (-k & 63);
  197. if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
  198. if (k == 0) {
  199. passthrough:
  200. J->cur.nins--;
  201. ir->prev = lo;
  202. return hi;
  203. } else {
  204. TRef k1, k2;
  205. IRRef t1, t2, t3, t4;
  206. J->cur.nins--;
  207. k1 = lj_ir_kint(J, k);
  208. k2 = lj_ir_kint(J, (-k & 31));
  209. t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
  210. t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
  211. t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
  212. t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
  213. ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
  214. return split_emit(J, IRTI(IR_BOR), t2, t3);
  215. }
  216. } else if (k == 0) {
  217. goto passthrough;
  218. } else if (k < 32) {
  219. if (op == IR_BSHL) {
  220. IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
  221. IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
  222. return split_emit(J, IRTI(IR_BOR), t1, t2);
  223. } else {
  224. IRRef t1 = ir->prev, t2;
  225. lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
  226. nir->o = IR_BSHR;
  227. t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
  228. ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
  229. return split_emit(J, IRTI(op), hi, kref);
  230. }
  231. } else {
  232. if (op == IR_BSHL) {
  233. if (k == 32)
  234. J->cur.nins--;
  235. else
  236. lo = ir->prev;
  237. ir->prev = lj_ir_kint(J, 0);
  238. return lo;
  239. } else {
  240. lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
  241. if (k == 32) {
  242. J->cur.nins--;
  243. ir->prev = hi;
  244. } else {
  245. nir->op1 = hi;
  246. }
  247. if (op == IR_BSHR)
  248. return lj_ir_kint(J, 0);
  249. else
  250. return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
  251. }
  252. }
  253. }
  254. return split_call_li(J, hisubst, oir, ir,
  255. op - IR_BSHL + IRCALL_lj_carith_shl64);
  256. }
  257. static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
  258. IRIns *nir, IRIns *ir)
  259. {
  260. IROp op = ir->o;
  261. IRRef hi, kref = nir->op2;
  262. if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
  263. int32_t k = IR(kref)->i;
  264. if (k == 0 || k == -1) {
  265. if (op == IR_BAND) k = ~k;
  266. if (k == 0) {
  267. J->cur.nins--;
  268. ir->prev = nir->op1;
  269. } else if (op == IR_BXOR) {
  270. nir->o = IR_BNOT;
  271. nir->op2 = 0;
  272. } else {
  273. J->cur.nins--;
  274. ir->prev = kref;
  275. }
  276. }
  277. }
  278. hi = hisubst[ir->op1];
  279. kref = hisubst[ir->op2];
  280. if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
  281. int32_t k = IR(kref)->i;
  282. if (k == 0 || k == -1) {
  283. if (op == IR_BAND) k = ~k;
  284. if (k == 0) {
  285. return hi;
  286. } else if (op == IR_BXOR) {
  287. return split_emit(J, IRTI(IR_BNOT), hi, 0);
  288. } else {
  289. return kref;
  290. }
  291. }
  292. }
  293. return split_emit(J, IRTI(op), hi, kref);
  294. }
  295. #endif
  296. /* Substitute references of a snapshot. */
  297. static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
  298. {
  299. SnapEntry *map = &J->cur.snapmap[snap->mapofs];
  300. MSize n, nent = snap->nent;
  301. for (n = 0; n < nent; n++) {
  302. SnapEntry sn = map[n];
  303. IRIns *ir = &oir[snap_ref(sn)];
  304. if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
  305. map[n] = ((sn & 0xffff0000) | ir->prev);
  306. }
  307. }
  308. /* Transform the old IR to the new IR. */
  309. static void split_ir(jit_State *J)
  310. {
  311. IRRef nins = J->cur.nins, nk = J->cur.nk;
  312. MSize irlen = nins - nk;
  313. MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
  314. IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
  315. IRRef1 *hisubst;
  316. IRRef ref, snref;
  317. SnapShot *snap;
  318. /* Copy old IR to buffer. */
  319. memcpy(oir, IR(nk), irlen*sizeof(IRIns));
  320. /* Bias hiword substitution table and old IR. Loword kept in field prev. */
  321. hisubst = (IRRef1 *)&oir[irlen] - nk;
  322. oir -= nk;
  323. /* Remove all IR instructions, but retain IR constants. */
  324. J->cur.nins = REF_FIRST;
  325. J->loopref = 0;
  326. /* Process constants and fixed references. */
  327. for (ref = nk; ref <= REF_BASE; ref++) {
  328. IRIns *ir = &oir[ref];
  329. if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
  330. /* Split up 64 bit constant. */
  331. TValue tv = *ir_k64(ir);
  332. ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
  333. hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
  334. } else {
  335. ir->prev = ref; /* Identity substitution for loword. */
  336. hisubst[ref] = 0;
  337. }
  338. if (irt_is64(ir->t) && ir->o != IR_KNULL)
  339. ref++;
  340. }
  341. /* Process old IR instructions. */
  342. snap = J->cur.snap;
  343. snref = snap->ref;
  344. for (ref = REF_FIRST; ref < nins; ref++) {
  345. IRIns *ir = &oir[ref];
  346. IRRef nref = lj_ir_nextins(J);
  347. IRIns *nir = IR(nref);
  348. IRRef hi = 0;
  349. if (ref >= snref) {
  350. snap->ref = nref;
  351. split_subst_snap(J, snap++, oir);
  352. snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
  353. }
  354. /* Copy-substitute old instruction to new instruction. */
  355. nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
  356. nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
  357. ir->prev = nref; /* Loword substitution. */
  358. nir->o = ir->o;
  359. nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
  360. hisubst[ref] = 0;
  361. /* Split 64 bit instructions. */
  362. #if LJ_SOFTFP
  363. if (irt_isnum(ir->t)) {
  364. nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
  365. /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
  366. switch (ir->o) {
  367. case IR_ADD:
  368. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
  369. break;
  370. case IR_SUB:
  371. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
  372. break;
  373. case IR_MUL:
  374. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
  375. break;
  376. case IR_DIV:
  377. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
  378. break;
  379. case IR_POW:
  380. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_pow);
  381. break;
  382. case IR_FPMATH:
  383. hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
  384. break;
  385. case IR_LDEXP:
  386. hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
  387. break;
  388. case IR_NEG: case IR_ABS:
  389. nir->o = IR_CONV; /* Pass through loword. */
  390. nir->op2 = (IRT_INT << 5) | IRT_INT;
  391. hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
  392. hisubst[ir->op1],
  393. lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG))));
  394. break;
  395. case IR_SLOAD:
  396. if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
  397. nir->op2 &= ~IRSLOAD_CONVERT;
  398. ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
  399. IRCALL_softfp_i2d);
  400. hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  401. break;
  402. }
  403. /* fallthrough */
  404. case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
  405. case IR_STRTO:
  406. hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  407. break;
  408. case IR_FLOAD:
  409. lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State");
  410. hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
  411. nir->op2 += LJ_BE*4;
  412. break;
  413. case IR_XLOAD: {
  414. IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
  415. J->cur.nins--;
  416. hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
  417. #if LJ_BE
  418. hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
  419. inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
  420. #endif
  421. nref = lj_ir_nextins(J);
  422. nir = IR(nref);
  423. *nir = inslo; /* Re-emit lo XLOAD. */
  424. #if LJ_LE
  425. hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
  426. ir->prev = nref;
  427. #else
  428. ir->prev = hi; hi = nref;
  429. #endif
  430. break;
  431. }
  432. case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
  433. split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
  434. break;
  435. case IR_CONV: { /* Conversion to number. Others handled below. */
  436. IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  437. UNUSED(st);
  438. #if LJ_32 && LJ_HASFFI
  439. if (st == IRT_I64 || st == IRT_U64) {
  440. hi = split_call_l(J, hisubst, oir, ir,
  441. st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
  442. break;
  443. }
  444. #endif
  445. lj_assertJ(st == IRT_INT ||
  446. (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)),
  447. "bad source type for CONV");
  448. nir->o = IR_CALLN;
  449. #if LJ_32 && LJ_HASFFI
  450. nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
  451. st == IRT_FLOAT ? IRCALL_softfp_f2d :
  452. IRCALL_softfp_ui2d;
  453. #else
  454. nir->op2 = IRCALL_softfp_i2d;
  455. #endif
  456. hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  457. break;
  458. }
  459. case IR_CALLN:
  460. case IR_CALLL:
  461. case IR_CALLS:
  462. case IR_CALLXS:
  463. goto split_call;
  464. case IR_PHI:
  465. if (nir->op1 == nir->op2)
  466. J->cur.nins--; /* Drop useless PHIs. */
  467. if (hisubst[ir->op1] != hisubst[ir->op2])
  468. split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
  469. hisubst[ir->op1], hisubst[ir->op2]);
  470. break;
  471. case IR_HIOP:
  472. J->cur.nins--; /* Drop joining HIOP. */
  473. ir->prev = nir->op1;
  474. hi = nir->op2;
  475. break;
  476. default:
  477. lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX,
  478. "bad IR op %d", ir->o);
  479. hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
  480. hisubst[ir->op1], hisubst[ir->op2]);
  481. break;
  482. }
  483. } else
  484. #endif
  485. #if LJ_32 && LJ_HASFFI
  486. if (irt_isint64(ir->t)) {
  487. IRRef hiref = hisubst[ir->op1];
  488. nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
  489. switch (ir->o) {
  490. case IR_ADD:
  491. case IR_SUB:
  492. /* Use plain op for hiword if loword cannot produce a carry/borrow. */
  493. if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
  494. ir->prev = nir->op1; /* Pass through loword. */
  495. nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
  496. hi = nref;
  497. break;
  498. }
  499. /* fallthrough */
  500. case IR_NEG:
  501. hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
  502. break;
  503. case IR_MUL:
  504. hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
  505. break;
  506. case IR_DIV:
  507. hi = split_call_ll(J, hisubst, oir, ir,
  508. irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
  509. IRCALL_lj_carith_divu64);
  510. break;
  511. case IR_MOD:
  512. hi = split_call_ll(J, hisubst, oir, ir,
  513. irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
  514. IRCALL_lj_carith_modu64);
  515. break;
  516. case IR_POW:
  517. hi = split_call_ll(J, hisubst, oir, ir,
  518. irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
  519. IRCALL_lj_carith_powu64);
  520. break;
  521. case IR_BNOT:
  522. hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
  523. break;
  524. case IR_BSWAP:
  525. ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
  526. hi = nref;
  527. break;
  528. case IR_BAND: case IR_BOR: case IR_BXOR:
  529. hi = split_bitop(J, hisubst, nir, ir);
  530. break;
  531. case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
  532. hi = split_bitshift(J, hisubst, oir, nir, ir);
  533. break;
  534. case IR_FLOAD:
  535. lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported");
  536. hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
  537. #if LJ_BE
  538. ir->prev = hi; hi = nref;
  539. #endif
  540. break;
  541. case IR_XLOAD:
  542. hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
  543. #if LJ_BE
  544. ir->prev = hi; hi = nref;
  545. #endif
  546. break;
  547. case IR_XSTORE:
  548. split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
  549. break;
  550. case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
  551. IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  552. #if LJ_SOFTFP
  553. if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
  554. hi = split_call_l(J, hisubst, oir, ir,
  555. irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
  556. } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
  557. nir->o = IR_CALLN;
  558. nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
  559. hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
  560. }
  561. #else
  562. if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
  563. hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
  564. }
  565. #endif
  566. else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
  567. /* Drop cast, since assembler doesn't care. But fwd both parts. */
  568. hi = hiref;
  569. goto fwdlo;
  570. } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
  571. IRRef k31 = lj_ir_kint(J, 31);
  572. nir = IR(nref); /* May have been reallocated. */
  573. ir->prev = nir->op1; /* Pass through loword. */
  574. nir->o = IR_BSAR; /* hi = bsar(lo, 31). */
  575. nir->op2 = k31;
  576. hi = nref;
  577. } else { /* Zero-extend to 64 bit. */
  578. hi = lj_ir_kint(J, 0);
  579. goto fwdlo;
  580. }
  581. break;
  582. }
  583. case IR_CALLXS:
  584. goto split_call;
  585. case IR_PHI: {
  586. IRRef hiref2;
  587. if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
  588. nir->op1 == nir->op2)
  589. J->cur.nins--; /* Drop useless PHIs. */
  590. hiref2 = hisubst[ir->op2];
  591. if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
  592. split_emit(J, IRTI(IR_PHI), hiref, hiref2);
  593. break;
  594. }
  595. case IR_HIOP:
  596. J->cur.nins--; /* Drop joining HIOP. */
  597. ir->prev = nir->op1;
  598. hi = nir->op2;
  599. break;
  600. default:
  601. lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */
  602. split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
  603. break;
  604. }
  605. } else
  606. #endif
  607. #if LJ_SOFTFP
  608. if (ir->o == IR_SLOAD) {
  609. if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */
  610. nir->op2 &= ~IRSLOAD_CONVERT;
  611. if (!(nir->op2 & IRSLOAD_TYPECHECK))
  612. nir->t.irt = IRT_INT; /* Drop guard. */
  613. split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
  614. ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
  615. }
  616. } else if (ir->o == IR_TOBIT) {
  617. IRRef tmp, op1 = ir->op1;
  618. J->cur.nins--;
  619. #if LJ_LE
  620. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
  621. #else
  622. tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
  623. #endif
  624. ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
  625. } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
  626. if (hisubst[ir->op1]) {
  627. if (irref_isk(ir->op1))
  628. nir->op1 = ir->op1;
  629. else
  630. split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
  631. }
  632. } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
  633. if (irref_isk(ir->op2) && hisubst[ir->op2])
  634. nir->op2 = ir->op2;
  635. } else
  636. #endif
  637. if (ir->o == IR_CONV) { /* See above, too. */
  638. IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
  639. #if LJ_32 && LJ_HASFFI
  640. if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
  641. #if LJ_SOFTFP
  642. if (irt_isfloat(ir->t)) {
  643. split_call_l(J, hisubst, oir, ir,
  644. st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
  645. J->cur.nins--; /* Drop unused HIOP. */
  646. }
  647. #else
  648. if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
  649. ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
  650. hisubst[ir->op1], nref);
  651. }
  652. #endif
  653. else { /* Truncate to lower 32 bits. */
  654. fwdlo:
  655. ir->prev = nir->op1; /* Forward loword. */
  656. /* Replace with NOP to avoid messing up the snapshot logic. */
  657. nir->ot = IRT(IR_NOP, IRT_NIL);
  658. nir->op1 = nir->op2 = 0;
  659. }
  660. }
  661. #endif
  662. #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
  663. else if (irt_isfloat(ir->t)) {
  664. if (st == IRT_NUM) {
  665. split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
  666. J->cur.nins--; /* Drop unused HIOP. */
  667. } else {
  668. nir->o = IR_CALLN;
  669. nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
  670. }
  671. } else if (st == IRT_FLOAT) {
  672. nir->o = IR_CALLN;
  673. nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
  674. } else
  675. #endif
  676. #if LJ_SOFTFP
  677. if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
  678. if (irt_isguard(ir->t)) {
  679. lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types");
  680. J->cur.nins--;
  681. ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
  682. } else {
  683. split_call_l(J, hisubst, oir, ir,
  684. #if LJ_32 && LJ_HASFFI
  685. st == IRT_NUM ?
  686. (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
  687. (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
  688. #else
  689. IRCALL_softfp_d2i
  690. #endif
  691. );
  692. J->cur.nins--; /* Drop unused HIOP. */
  693. }
  694. }
  695. #endif
  696. } else if (ir->o == IR_CALLXS) {
  697. IRRef hiref;
  698. split_call:
  699. hiref = hisubst[ir->op1];
  700. if (hiref) {
  701. IROpT ot = nir->ot;
  702. IRRef op2 = nir->op2;
  703. nir->ot = IRT(IR_CARG, IRT_NIL);
  704. #if LJ_LE
  705. nir->op2 = hiref;
  706. #else
  707. nir->op2 = nir->op1; nir->op1 = hiref;
  708. #endif
  709. ir->prev = nref = split_emit(J, ot, nref, op2);
  710. }
  711. if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
  712. hi = split_emit(J,
  713. IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
  714. nref, nref);
  715. } else if (ir->o == IR_CARG) {
  716. IRRef hiref = hisubst[ir->op1];
  717. if (hiref) {
  718. IRRef op2 = nir->op2;
  719. #if LJ_LE
  720. nir->op2 = hiref;
  721. #else
  722. nir->op2 = nir->op1; nir->op1 = hiref;
  723. #endif
  724. ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
  725. nir = IR(nref);
  726. }
  727. hiref = hisubst[ir->op2];
  728. if (hiref) {
  729. #if !LJ_TARGET_X86
  730. int carg = 0;
  731. IRIns *cir;
  732. for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
  733. carg++;
  734. if ((carg & 1) == 0) { /* Align 64 bit arguments. */
  735. IRRef op2 = nir->op2;
  736. nir->op2 = REF_NIL;
  737. nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
  738. nir = IR(nref);
  739. }
  740. #endif
  741. #if LJ_BE
  742. { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
  743. #endif
  744. ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
  745. }
  746. } else if (ir->o == IR_CNEWI) {
  747. if (hisubst[ir->op2])
  748. split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
  749. } else if (ir->o == IR_LOOP) {
  750. J->loopref = nref; /* Needed by assembler. */
  751. }
  752. hisubst[ref] = hi; /* Store hiword substitution. */
  753. }
  754. if (snref == nins) { /* Substitution for last snapshot. */
  755. snap->ref = J->cur.nins;
  756. split_subst_snap(J, snap, oir);
  757. }
  758. /* Add PHI marks. */
  759. for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
  760. IRIns *ir = IR(ref);
  761. if (ir->o != IR_PHI) break;
  762. if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
  763. if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
  764. }
  765. }
  766. /* Protected callback for split pass. */
  767. static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
  768. {
  769. jit_State *J = (jit_State *)ud;
  770. split_ir(J);
  771. UNUSED(L); UNUSED(dummy);
  772. return NULL;
  773. }
  774. #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
  775. /* Slow, but sure way to check whether a SPLIT pass is needed. */
  776. static int split_needsplit(jit_State *J)
  777. {
  778. IRIns *ir, *irend;
  779. IRRef ref;
  780. for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
  781. if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
  782. return 1;
  783. if (LJ_SOFTFP) {
  784. for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
  785. if ((IR(ref)->op2 & IRSLOAD_CONVERT))
  786. return 1;
  787. if (J->chain[IR_TOBIT])
  788. return 1;
  789. }
  790. for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
  791. IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
  792. if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
  793. st == IRT_I64 || st == IRT_U64)
  794. return 1;
  795. }
  796. return 0; /* Nope. */
  797. }
  798. #endif
  799. /* SPLIT pass. */
  800. void lj_opt_split(jit_State *J)
  801. {
  802. #if LJ_SOFTFP
  803. if (!J->needsplit)
  804. J->needsplit = split_needsplit(J);
  805. #else
  806. lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state");
  807. #endif
  808. if (J->needsplit) {
  809. int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
  810. if (errcode) {
  811. /* Completely reset the trace to avoid inconsistent dump on abort. */
  812. J->cur.nins = J->cur.nk = REF_BASE;
  813. J->cur.nsnap = 0;
  814. lj_err_throw(J->L, errcode); /* Propagate errors. */
  815. }
  816. }
  817. }
  818. #undef IR
  819. #endif