lj_target_arm64.h 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. /*
  2. ** Definitions for ARM64 CPUs.
  3. ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
  4. */
  5. #ifndef _LJ_TARGET_ARM64_H
  6. #define _LJ_TARGET_ARM64_H
  7. /* -- Registers IDs ------------------------------------------------------- */
  8. #define GPRDEF(_) \
  9. _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \
  10. _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
  11. _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
  12. _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP)
  13. #define FPRDEF(_) \
  14. _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
  15. _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \
  16. _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \
  17. _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31)
  18. #define VRIDDEF(_)
  19. #define RIDENUM(name) RID_##name,
  20. enum {
  21. GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
  22. FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
  23. RID_MAX,
  24. RID_TMP = RID_LR,
  25. RID_ZERO = RID_SP,
  26. /* Calling conventions. */
  27. RID_RET = RID_X0,
  28. RID_RETLO = RID_X0,
  29. RID_RETHI = RID_X1,
  30. RID_FPRET = RID_D0,
  31. /* These definitions must match with the *.dasc file(s): */
  32. RID_BASE = RID_X19, /* Interpreter BASE. */
  33. RID_LPC = RID_X21, /* Interpreter PC. */
  34. RID_GL = RID_X22, /* Interpreter GL. */
  35. RID_LREG = RID_X23, /* Interpreter L. */
  36. /* Register ranges [min, max) and number of registers. */
  37. RID_MIN_GPR = RID_X0,
  38. RID_MAX_GPR = RID_SP+1,
  39. RID_MIN_FPR = RID_MAX_GPR,
  40. RID_MAX_FPR = RID_D31+1,
  41. RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
  42. RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
  43. };
  44. #define RID_NUM_KREF RID_NUM_GPR
  45. #define RID_MIN_KREF RID_X0
  46. /* -- Register sets ------------------------------------------------------- */
  47. /* Make use of all registers, except for x18, fp, lr and sp. */
  48. #define RSET_FIXED \
  49. (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\
  50. RID2RSET(RID_GL))
  51. #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
  52. #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
  53. #define RSET_ALL (RSET_GPR|RSET_FPR)
  54. #define RSET_INIT RSET_ALL
  55. /* lr is an implicit scratch register. */
  56. #define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1))
  57. #define RSET_SCRATCH_FPR \
  58. (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1))
  59. #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
  60. #define REGARG_FIRSTGPR RID_X0
  61. #define REGARG_LASTGPR RID_X7
  62. #define REGARG_NUMGPR 8
  63. #define REGARG_FIRSTFPR RID_D0
  64. #define REGARG_LASTFPR RID_D7
  65. #define REGARG_NUMFPR 8
  66. /* -- Spill slots --------------------------------------------------------- */
  67. /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
  68. **
  69. ** SPS_FIXED: Available fixed spill slots in interpreter frame.
  70. ** This definition must match with the vm_arm64.dasc file.
  71. ** Pre-allocate some slots to avoid sp adjust in every root trace.
  72. **
  73. ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
  74. */
  75. #define SPS_FIXED 4
  76. #define SPS_FIRST 2
  77. #define SPOFS_TMP 0
  78. #define sps_scale(slot) (4 * (int32_t)(slot))
  79. #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
  80. /* -- Exit state ---------------------------------------------------------- */
  81. /* This definition must match with the *.dasc file(s). */
  82. typedef struct {
  83. lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
  84. intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
  85. int32_t spill[256]; /* Spill slots. */
  86. } ExitState;
  87. /* Highest exit + 1 indicates stack check. */
  88. #define EXITSTATE_CHECKEXIT 1
  89. /* Return the address of a per-trace exit stub. */
  90. static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
  91. {
  92. while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
  93. return p + 3 + exitno;
  94. }
  95. /* Avoid dependence on lj_jit.h if only including lj_target.h. */
  96. #define exitstub_trace_addr(T, exitno) \
  97. exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
  98. /* -- Instructions -------------------------------------------------------- */
  99. /* ARM64 instructions are always little-endian. Swap for ARM64BE. */
  100. #if LJ_BE
  101. #define A64I_LE(x) (lj_bswap(x))
  102. #else
  103. #define A64I_LE(x) (x)
  104. #endif
  105. /* Instruction fields. */
  106. #define A64F_D(r) (r)
  107. #define A64F_N(r) ((r) << 5)
  108. #define A64F_A(r) ((r) << 10)
  109. #define A64F_M(r) ((r) << 16)
  110. #define A64F_IMMS(x) ((x) << 10)
  111. #define A64F_IMMR(x) ((x) << 16)
  112. #define A64F_U16(x) ((x) << 5)
  113. #define A64F_U12(x) ((x) << 10)
  114. #define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu))
  115. #define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5)
  116. #define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5)
  117. #define A64F_S9(x) ((x) << 12)
  118. #define A64F_BIT(x) ((x) << 19)
  119. #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
  120. #define A64F_EX(ex) (A64I_EX | ((ex) << 13))
  121. #define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10))
  122. #define A64F_FP8(x) ((x) << 13)
  123. #define A64F_CC(cc) ((cc) << 12)
  124. #define A64F_LSL16(x) (((x) / 16) << 21)
  125. #define A64F_BSH(sh) ((sh) << 10)
  126. /* Check for valid field range. */
  127. #define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
  128. typedef enum A64Ins {
  129. A64I_S = 0x20000000,
  130. A64I_X = 0x80000000,
  131. A64I_EX = 0x00200000,
  132. A64I_ON = 0x00200000,
  133. A64I_K12 = 0x1a000000,
  134. A64I_K13 = 0x18000000,
  135. A64I_LS_U = 0x01000000,
  136. A64I_LS_S = 0x00800000,
  137. A64I_LS_R = 0x01200800,
  138. A64I_LS_SH = 0x00001000,
  139. A64I_LS_UXTWx = 0x00004000,
  140. A64I_LS_SXTWx = 0x0000c000,
  141. A64I_LS_SXTXx = 0x0000e000,
  142. A64I_LS_LSLx = 0x00006000,
  143. A64I_ADDw = 0x0b000000,
  144. A64I_ADDx = 0x8b000000,
  145. A64I_ADDSw = 0x2b000000,
  146. A64I_ADDSx = 0xab000000,
  147. A64I_NEGw = 0x4b0003e0,
  148. A64I_NEGx = 0xcb0003e0,
  149. A64I_SUBw = 0x4b000000,
  150. A64I_SUBx = 0xcb000000,
  151. A64I_SUBSw = 0x6b000000,
  152. A64I_SUBSx = 0xeb000000,
  153. A64I_MULw = 0x1b007c00,
  154. A64I_MULx = 0x9b007c00,
  155. A64I_SMULL = 0x9b207c00,
  156. A64I_ANDw = 0x0a000000,
  157. A64I_ANDx = 0x8a000000,
  158. A64I_ANDSw = 0x6a000000,
  159. A64I_ANDSx = 0xea000000,
  160. A64I_EORw = 0x4a000000,
  161. A64I_EORx = 0xca000000,
  162. A64I_ORRw = 0x2a000000,
  163. A64I_ORRx = 0xaa000000,
  164. A64I_TSTw = 0x6a00001f,
  165. A64I_TSTx = 0xea00001f,
  166. A64I_CMPw = 0x6b00001f,
  167. A64I_CMPx = 0xeb00001f,
  168. A64I_CMNw = 0x2b00001f,
  169. A64I_CMNx = 0xab00001f,
  170. A64I_CCMPw = 0x7a400000,
  171. A64I_CCMPx = 0xfa400000,
  172. A64I_CSELw = 0x1a800000,
  173. A64I_CSELx = 0x9a800000,
  174. A64I_ASRw = 0x13007c00,
  175. A64I_ASRx = 0x9340fc00,
  176. A64I_LSLx = 0xd3400000,
  177. A64I_LSRx = 0xd340fc00,
  178. A64I_SHRw = 0x1ac02000,
  179. A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */
  180. A64I_REVw = 0x5ac00800,
  181. A64I_REVx = 0xdac00c00,
  182. A64I_EXTRw = 0x13800000,
  183. A64I_EXTRx = 0x93c00000,
  184. A64I_BFMw = 0x33000000,
  185. A64I_BFMx = 0xb3400000,
  186. A64I_SBFMw = 0x13000000,
  187. A64I_SBFMx = 0x93400000,
  188. A64I_SXTBw = 0x13001c00,
  189. A64I_SXTHw = 0x13003c00,
  190. A64I_SXTW = 0x93407c00,
  191. A64I_UBFMw = 0x53000000,
  192. A64I_UBFMx = 0xd3400000,
  193. A64I_UXTBw = 0x53001c00,
  194. A64I_UXTHw = 0x53003c00,
  195. A64I_MOVw = 0x2a0003e0,
  196. A64I_MOVx = 0xaa0003e0,
  197. A64I_MVNw = 0x2a2003e0,
  198. A64I_MVNx = 0xaa2003e0,
  199. A64I_MOVKw = 0x72800000,
  200. A64I_MOVKx = 0xf2800000,
  201. A64I_MOVZw = 0x52800000,
  202. A64I_MOVZx = 0xd2800000,
  203. A64I_MOVNw = 0x12800000,
  204. A64I_MOVNx = 0x92800000,
  205. A64I_ADR = 0x10000000,
  206. A64I_ADRP = 0x90000000,
  207. A64I_LDRB = 0x39400000,
  208. A64I_LDRH = 0x79400000,
  209. A64I_LDRw = 0xb9400000,
  210. A64I_LDRx = 0xf9400000,
  211. A64I_LDRLw = 0x18000000,
  212. A64I_LDRLx = 0x58000000,
  213. A64I_STRB = 0x39000000,
  214. A64I_STRH = 0x79000000,
  215. A64I_STRw = 0xb9000000,
  216. A64I_STRx = 0xf9000000,
  217. A64I_STPw = 0x29000000,
  218. A64I_STPx = 0xa9000000,
  219. A64I_LDPw = 0x29400000,
  220. A64I_LDPx = 0xa9400000,
  221. A64I_B = 0x14000000,
  222. A64I_BCC = 0x54000000,
  223. A64I_BL = 0x94000000,
  224. A64I_BR = 0xd61f0000,
  225. A64I_BLR = 0xd63f0000,
  226. A64I_TBZ = 0x36000000,
  227. A64I_TBNZ = 0x37000000,
  228. A64I_CBZ = 0x34000000,
  229. A64I_CBNZ = 0x35000000,
  230. A64I_BRAAZ = 0xd61f081f,
  231. A64I_BLRAAZ = 0xd63f081f,
  232. A64I_NOP = 0xd503201f,
  233. /* FP */
  234. A64I_FADDd = 0x1e602800,
  235. A64I_FSUBd = 0x1e603800,
  236. A64I_FMADDd = 0x1f400000,
  237. A64I_FMSUBd = 0x1f408000,
  238. A64I_FNMADDd = 0x1f600000,
  239. A64I_FNMSUBd = 0x1f608000,
  240. A64I_FMULd = 0x1e600800,
  241. A64I_FDIVd = 0x1e601800,
  242. A64I_FNEGd = 0x1e614000,
  243. A64I_FABS = 0x1e60c000,
  244. A64I_FSQRTd = 0x1e61c000,
  245. A64I_LDRs = 0xbd400000,
  246. A64I_LDRd = 0xfd400000,
  247. A64I_STRs = 0xbd000000,
  248. A64I_STRd = 0xfd000000,
  249. A64I_LDPs = 0x2d400000,
  250. A64I_LDPd = 0x6d400000,
  251. A64I_STPs = 0x2d000000,
  252. A64I_STPd = 0x6d000000,
  253. A64I_FCMPd = 0x1e602000,
  254. A64I_FCMPZd = 0x1e602008,
  255. A64I_FCSELd = 0x1e600c00,
  256. A64I_FRINTMd = 0x1e654000,
  257. A64I_FRINTPd = 0x1e64c000,
  258. A64I_FRINTZd = 0x1e65c000,
  259. A64I_FCVT_F32_F64 = 0x1e624000,
  260. A64I_FCVT_F64_F32 = 0x1e22c000,
  261. A64I_FCVT_F32_S32 = 0x1e220000,
  262. A64I_FCVT_F64_S32 = 0x1e620000,
  263. A64I_FCVT_F32_U32 = 0x1e230000,
  264. A64I_FCVT_F64_U32 = 0x1e630000,
  265. A64I_FCVT_F32_S64 = 0x9e220000,
  266. A64I_FCVT_F64_S64 = 0x9e620000,
  267. A64I_FCVT_F32_U64 = 0x9e230000,
  268. A64I_FCVT_F64_U64 = 0x9e630000,
  269. A64I_FCVT_S32_F64 = 0x1e780000,
  270. A64I_FCVT_S32_F32 = 0x1e380000,
  271. A64I_FCVT_U32_F64 = 0x1e790000,
  272. A64I_FCVT_U32_F32 = 0x1e390000,
  273. A64I_FCVT_S64_F64 = 0x9e780000,
  274. A64I_FCVT_S64_F32 = 0x9e380000,
  275. A64I_FCVT_U64_F64 = 0x9e790000,
  276. A64I_FCVT_U64_F32 = 0x9e390000,
  277. A64I_FMOV_S = 0x1e204000,
  278. A64I_FMOV_D = 0x1e604000,
  279. A64I_FMOV_R_S = 0x1e260000,
  280. A64I_FMOV_S_R = 0x1e270000,
  281. A64I_FMOV_R_D = 0x9e660000,
  282. A64I_FMOV_D_R = 0x9e670000,
  283. A64I_FMOV_DI = 0x1e601000,
  284. } A64Ins;
  285. #define A64I_BR_AUTH (LJ_ABI_PAUTH ? A64I_BRAAZ : A64I_BR)
  286. #define A64I_BLR_AUTH (LJ_ABI_PAUTH ? A64I_BLRAAZ : A64I_BLR)
  287. typedef enum A64Shift {
  288. A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR
  289. } A64Shift;
  290. typedef enum A64Extend {
  291. A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX,
  292. A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX,
  293. } A64Extend;
  294. /* ARM condition codes. */
  295. typedef enum A64CC {
  296. CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
  297. CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
  298. CC_HS = CC_CS, CC_LO = CC_CC
  299. } A64CC;
  300. #endif