lj_emit_arm.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. /*
  2. ** ARM instruction emitter.
  3. ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
  4. */
  5. /* -- Constant encoding --------------------------------------------------- */
  6. static uint8_t emit_invai[16] = {
  7. /* AND */ (ARMI_AND^ARMI_BIC) >> 21,
  8. /* EOR */ 0,
  9. /* SUB */ (ARMI_SUB^ARMI_ADD) >> 21,
  10. /* RSB */ 0,
  11. /* ADD */ (ARMI_ADD^ARMI_SUB) >> 21,
  12. /* ADC */ (ARMI_ADC^ARMI_SBC) >> 21,
  13. /* SBC */ (ARMI_SBC^ARMI_ADC) >> 21,
  14. /* RSC */ 0,
  15. /* TST */ 0,
  16. /* TEQ */ 0,
  17. /* CMP */ (ARMI_CMP^ARMI_CMN) >> 21,
  18. /* CMN */ (ARMI_CMN^ARMI_CMP) >> 21,
  19. /* ORR */ 0,
  20. /* MOV */ (ARMI_MOV^ARMI_MVN) >> 21,
  21. /* BIC */ (ARMI_BIC^ARMI_AND) >> 21,
  22. /* MVN */ (ARMI_MVN^ARMI_MOV) >> 21
  23. };
  24. /* Encode constant in K12 format for data processing instructions. */
  25. static uint32_t emit_isk12(ARMIns ai, int32_t n)
  26. {
  27. uint32_t invai, i, m = (uint32_t)n;
  28. /* K12: unsigned 8 bit value, rotated in steps of two bits. */
  29. for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2))
  30. if (m <= 255) return ARMI_K12|m|i;
  31. /* Otherwise try negation/complement with the inverse instruction. */
  32. invai = emit_invai[((ai >> 21) & 15)];
  33. if (!invai) return 0; /* Failed. No inverse instruction. */
  34. m = ~(uint32_t)n;
  35. if (invai == ((ARMI_SUB^ARMI_ADD) >> 21) ||
  36. invai == (ARMI_CMP^ARMI_CMN) >> 21) m++;
  37. for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2))
  38. if (m <= 255) return ARMI_K12|(invai<<21)|m|i;
  39. return 0; /* Failed. */
  40. }
  41. /* -- Emit basic instructions --------------------------------------------- */
  42. static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm)
  43. {
  44. *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm);
  45. }
  46. static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm)
  47. {
  48. *--as->mcp = ai | ARMF_D(rd) | ARMF_M(rm);
  49. }
  50. static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn)
  51. {
  52. *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn);
  53. }
  54. static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm)
  55. {
  56. *--as->mcp = ai | ARMF_N(rn) | ARMF_M(rm);
  57. }
  58. static void emit_d(ASMState *as, ARMIns ai, Reg rd)
  59. {
  60. *--as->mcp = ai | ARMF_D(rd);
  61. }
  62. static void emit_n(ASMState *as, ARMIns ai, Reg rn)
  63. {
  64. *--as->mcp = ai | ARMF_N(rn);
  65. }
  66. static void emit_m(ASMState *as, ARMIns ai, Reg rm)
  67. {
  68. *--as->mcp = ai | ARMF_M(rm);
  69. }
  70. static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
  71. {
  72. lj_assertA(ofs >= -255 && ofs <= 255,
  73. "load/store offset %d out of range", ofs);
  74. if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
  75. *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) |
  76. ((ofs & 0xf0) << 4) | (ofs & 0x0f);
  77. }
  78. static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
  79. {
  80. lj_assertA(ofs >= -4095 && ofs <= 4095,
  81. "load/store offset %d out of range", ofs);
  82. /* Combine LDR/STR pairs to LDRD/STRD. */
  83. if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) &&
  84. (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn &&
  85. (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >>2)) & 1) &&
  86. as->mcp != as->mcloop) {
  87. as->mcp++;
  88. emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd&~1, rn, ofs&~4);
  89. return;
  90. }
  91. if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
  92. *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs;
  93. }
  94. #if !LJ_SOFTFP
  95. static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
  96. {
  97. lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0,
  98. "load/store offset %d out of range", ofs);
  99. if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
  100. *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2);
  101. }
  102. #endif
  103. /* -- Emit loads/stores --------------------------------------------------- */
  104. /* Prefer spills of BASE/L. */
  105. #define emit_canremat(ref) ((ref) < ASMREF_L)
  106. /* Try to find a one step delta relative to another constant. */
  107. static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
  108. {
  109. RegSet work = ~as->freeset & RSET_GPR;
  110. while (work) {
  111. Reg r = rset_picktop(work);
  112. IRRef ref = regcost_ref(as->cost[r]);
  113. lj_assertA(r != d, "dest reg not free");
  114. if (emit_canremat(ref)) {
  115. int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
  116. uint32_t k = emit_isk12(ARMI_ADD, delta);
  117. if (k) {
  118. if (k == ARMI_K12)
  119. emit_dm(as, ARMI_MOV, d, r);
  120. else
  121. emit_dn(as, ARMI_ADD^k, d, r);
  122. return 1;
  123. }
  124. }
  125. rset_clear(work, r);
  126. }
  127. return 0; /* Failed. */
  128. }
  129. /* Try to find a two step delta relative to another constant. */
  130. static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
  131. {
  132. RegSet work = ~as->freeset & RSET_GPR;
  133. while (work) {
  134. Reg r = rset_picktop(work);
  135. IRRef ref = regcost_ref(as->cost[r]);
  136. lj_assertA(r != rd, "dest reg %d not free", rd);
  137. if (emit_canremat(ref)) {
  138. int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i;
  139. if (other) {
  140. int32_t delta = i - other;
  141. uint32_t sh, inv = 0, k2, k;
  142. if (delta < 0) { delta = (int32_t)(~(uint32_t)delta+1u); inv = ARMI_ADD^ARMI_SUB; }
  143. sh = lj_ffs(delta) & ~1;
  144. k2 = emit_isk12(0, delta & (255 << sh));
  145. k = emit_isk12(0, delta & ~(255 << sh));
  146. if (k) {
  147. emit_dn(as, ARMI_ADD^k2^inv, rd, rd);
  148. emit_dn(as, ARMI_ADD^k^inv, rd, r);
  149. return 1;
  150. }
  151. }
  152. }
  153. rset_clear(work, r);
  154. }
  155. return 0; /* Failed. */
  156. }
  157. /* Load a 32 bit constant into a GPR. */
  158. static void emit_loadi(ASMState *as, Reg rd, int32_t i)
  159. {
  160. uint32_t k = emit_isk12(ARMI_MOV, i);
  161. lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP,
  162. "dest reg %d not free", rd);
  163. if (k) {
  164. /* Standard K12 constant. */
  165. emit_d(as, ARMI_MOV^k, rd);
  166. } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
  167. /* 16 bit loword constant for ARMv6T2. */
  168. emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
  169. } else if (emit_kdelta1(as, rd, i)) {
  170. /* One step delta relative to another constant. */
  171. } else if ((as->flags & JIT_F_ARMV6T2)) {
  172. /* 32 bit hiword/loword constant for ARMv6T2. */
  173. emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd);
  174. emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
  175. } else if (emit_kdelta2(as, rd, i)) {
  176. /* Two step delta relative to another constant. */
  177. } else {
  178. /* Otherwise construct the constant with up to 4 instructions. */
  179. /* NYI: use mvn+bic, use pc-relative loads. */
  180. for (;;) {
  181. uint32_t sh = lj_ffs(i) & ~1;
  182. int32_t m = i & (255 << sh);
  183. i &= ~(255 << sh);
  184. if (i == 0) {
  185. emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd);
  186. break;
  187. }
  188. emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd);
  189. }
  190. }
  191. }
  192. #define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr)))
  193. static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
  194. /* Get/set from constant pointer. */
  195. static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
  196. {
  197. int32_t i = i32ptr(p);
  198. emit_lso(as, ai, r, ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)),
  199. (i & 4095));
  200. }
  201. #if !LJ_SOFTFP
  202. /* Load a number constant into an FPR. */
  203. static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
  204. {
  205. cTValue *tv = ir_knum(ir);
  206. int32_t i;
  207. if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
  208. uint32_t hi = tv->u32.hi;
  209. uint32_t b = ((hi >> 22) & 0x1ff);
  210. if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) {
  211. *--as->mcp = ARMI_VMOVI_D | ARMF_D(r & 15) |
  212. ((tv->u32.hi >> 12) & 0x00080000) |
  213. ((tv->u32.hi >> 4) & 0x00070000) |
  214. ((tv->u32.hi >> 16) & 0x0000000f);
  215. return;
  216. }
  217. }
  218. i = i32ptr(tv);
  219. emit_vlso(as, ARMI_VLDR_D, r,
  220. ra_allock(as, (i & ~1020), RSET_GPR), (i & 1020));
  221. }
  222. #endif
  223. /* Get/set global_State fields. */
  224. #define emit_getgl(as, r, field) \
  225. emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field)
  226. #define emit_setgl(as, r, field) \
  227. emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field)
  228. /* Trace number is determined from pc of exit instruction. */
  229. #define emit_setvmstate(as, i) UNUSED(i)
  230. /* -- Emit control-flow instructions -------------------------------------- */
  231. /* Label for internal jumps. */
  232. typedef MCode *MCLabel;
  233. /* Return label pointing to current PC. */
  234. #define emit_label(as) ((as)->mcp)
  235. static void emit_branch(ASMState *as, ARMIns ai, MCode *target)
  236. {
  237. MCode *p = as->mcp;
  238. ptrdiff_t delta = (target - p) - 1;
  239. lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range");
  240. *--p = ai | ((uint32_t)delta & 0x00ffffffu);
  241. as->mcp = p;
  242. }
  243. #define emit_jmp(as, target) emit_branch(as, ARMI_B, (target))
  244. static void emit_call(ASMState *as, void *target)
  245. {
  246. MCode *p = --as->mcp;
  247. ptrdiff_t delta = ((char *)target - (char *)p) - 8;
  248. if ((((delta>>2) + 0x00800000) >> 24) == 0) {
  249. if ((delta & 1))
  250. *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 23);
  251. else
  252. *p = ARMI_BL | ((uint32_t)(delta>>2) & 0x00ffffffu);
  253. } else { /* Target out of range: need indirect call. But don't use R0-R3. */
  254. Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12+1));
  255. *p = ARMI_BLXr | ARMF_M(r);
  256. }
  257. }
  258. /* -- Emit generic operations --------------------------------------------- */
  259. /* Generic move between two regs. */
  260. static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
  261. {
  262. #if LJ_SOFTFP
  263. lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
  264. #else
  265. if (dst >= RID_MAX_GPR) {
  266. emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,
  267. (dst & 15), (src & 15));
  268. return;
  269. }
  270. #endif
  271. if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */
  272. MCode ins = *as->mcp, swp = (src^dst);
  273. if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) {
  274. if (!((ins ^ (dst << 16)) & 0x000f0000))
  275. *as->mcp = ins ^ (swp << 16); /* Swap N in load/store. */
  276. if (!(ins & 0x00100000) && !((ins ^ (dst << 12)) & 0x0000f000))
  277. *as->mcp = ins ^ (swp << 12); /* Swap D in store. */
  278. }
  279. }
  280. emit_dm(as, ARMI_MOV, dst, src);
  281. }
  282. /* Generic load of register with base and (small) offset address. */
  283. static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
  284. {
  285. #if LJ_SOFTFP
  286. lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
  287. #else
  288. if (r >= RID_MAX_GPR)
  289. emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
  290. else
  291. #endif
  292. emit_lso(as, ARMI_LDR, r, base, ofs);
  293. }
  294. /* Generic store of register with base and (small) offset address. */
  295. static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
  296. {
  297. #if LJ_SOFTFP
  298. lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
  299. #else
  300. if (r >= RID_MAX_GPR)
  301. emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
  302. else
  303. #endif
  304. emit_lso(as, ARMI_STR, r, base, ofs);
  305. }
  306. /* Emit an arithmetic/logic operation with a constant operand. */
  307. static void emit_opk(ASMState *as, ARMIns ai, Reg dest, Reg src,
  308. int32_t i, RegSet allow)
  309. {
  310. uint32_t k = emit_isk12(ai, i);
  311. if (k)
  312. emit_dn(as, ai^k, dest, src);
  313. else
  314. emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
  315. }
  316. /* Add offset to pointer. */
  317. static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
  318. {
  319. if (ofs)
  320. emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r));
  321. }
  322. #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))