lj_target_x86.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. /*
  2. ** Definitions for x86 and x64 CPUs.
  3. ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
  4. */
  5. #ifndef _LJ_TARGET_X86_H
  6. #define _LJ_TARGET_X86_H
  7. /* -- Registers IDs ------------------------------------------------------- */
  8. #if LJ_64
  9. #define GPRDEF(_) \
  10. _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \
  11. _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D)
  12. #define FPRDEF(_) \
  13. _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \
  14. _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15)
  15. #else
  16. #define GPRDEF(_) \
  17. _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI)
  18. #define FPRDEF(_) \
  19. _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
  20. #endif
  21. #define VRIDDEF(_) \
  22. _(MRM) _(RIP)
  23. #define RIDENUM(name) RID_##name,
  24. enum {
  25. GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
  26. FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
  27. RID_MAX,
  28. RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
  29. RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */
  30. /* Calling conventions. */
  31. RID_SP = RID_ESP,
  32. RID_RET = RID_EAX,
  33. #if LJ_64
  34. RID_FPRET = RID_XMM0,
  35. #endif
  36. RID_RETLO = RID_EAX,
  37. RID_RETHI = RID_EDX,
  38. /* These definitions must match with the *.dasc file(s): */
  39. RID_BASE = RID_EDX, /* Interpreter BASE. */
  40. #if LJ_64 && !LJ_ABI_WIN
  41. RID_LPC = RID_EBX, /* Interpreter PC. */
  42. RID_DISPATCH = RID_R14D, /* Interpreter DISPATCH table. */
  43. #else
  44. RID_LPC = RID_ESI, /* Interpreter PC. */
  45. RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */
  46. #endif
  47. /* Register ranges [min, max) and number of registers. */
  48. RID_MIN_GPR = RID_EAX,
  49. RID_MIN_FPR = RID_XMM0,
  50. RID_MAX_GPR = RID_MIN_FPR,
  51. RID_MAX_FPR = RID_MAX,
  52. RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
  53. RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
  54. };
  55. /* -- Register sets ------------------------------------------------------- */
  56. /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
  57. #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
  58. - RID2RSET(RID_ESP) \
  59. - LJ_GC64*RID2RSET(RID_DISPATCH))
  60. #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
  61. #define RSET_ALL (RSET_GPR|RSET_FPR)
  62. #define RSET_INIT RSET_ALL
  63. #if LJ_64
  64. /* Note: this requires the use of FORCE_REX! */
  65. #define RSET_GPR8 RSET_GPR
  66. #else
  67. #define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1))
  68. #endif
  69. /* ABI-specific register sets. */
  70. #define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX))
  71. #if LJ_64
  72. #if LJ_ABI_WIN
  73. /* Windows x64 ABI. */
  74. #define RSET_SCRATCH \
  75. (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
  76. #define REGARG_GPRS \
  77. (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
  78. #define REGARG_NUMGPR 4
  79. #define REGARG_NUMFPR 4
  80. #define REGARG_FIRSTFPR RID_XMM0
  81. #define REGARG_LASTFPR RID_XMM3
  82. #define STACKARG_OFS (4*8)
  83. #else
  84. /* The rest of the civilized x64 world has a common ABI. */
  85. #define RSET_SCRATCH \
  86. (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
  87. #define REGARG_GPRS \
  88. (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
  89. <<5))<<5))<<5))<<5))<<5))
  90. #define REGARG_NUMGPR 6
  91. #define REGARG_NUMFPR 8
  92. #define REGARG_FIRSTFPR RID_XMM0
  93. #define REGARG_LASTFPR RID_XMM7
  94. #define STACKARG_OFS 0
  95. #endif
  96. #else
  97. /* Common x86 ABI. */
  98. #define RSET_SCRATCH (RSET_ACD|RSET_FPR)
  99. #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */
  100. #define REGARG_NUMGPR 2 /* Fastcall only. */
  101. #define REGARG_NUMFPR 0
  102. #define STACKARG_OFS 0
  103. #endif
  104. #if LJ_64
  105. /* Prefer the low 8 regs of each type to reduce REX prefixes. */
  106. #undef rset_picktop_
  107. #define rset_picktop_(rs) (lj_fls(lj_bswap(rs)) ^ 0x18)
  108. #endif
  109. /* -- Spill slots --------------------------------------------------------- */
  110. /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
  111. **
  112. ** SPS_FIXED: Available fixed spill slots in interpreter frame.
  113. ** This definition must match with the *.dasc file(s).
  114. **
  115. ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
  116. */
  117. #if LJ_64
  118. #if LJ_ABI_WIN
  119. #define SPS_FIXED (4*2)
  120. #define SPS_FIRST (4*2) /* Don't use callee register save area. */
  121. #else
  122. #if LJ_GC64
  123. #define SPS_FIXED 2
  124. #else
  125. #define SPS_FIXED 4
  126. #endif
  127. #define SPS_FIRST 2
  128. #endif
  129. #else
  130. #define SPS_FIXED 6
  131. #define SPS_FIRST 2
  132. #endif
  133. #define SPOFS_TMP 0
  134. #define sps_scale(slot) (4 * (int32_t)(slot))
  135. #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
  136. /* -- Exit state ---------------------------------------------------------- */
  137. /* This definition must match with the *.dasc file(s). */
  138. typedef struct {
  139. lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
  140. intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
  141. int32_t spill[256]; /* Spill slots. */
  142. } ExitState;
  143. /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */
  144. #define EXITSTUB_SPACING (2+2)
  145. #define EXITSTUBS_PER_GROUP 32
  146. #define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */
  147. /* -- x86 ModRM operand encoding ------------------------------------------ */
  148. typedef enum {
  149. XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0,
  150. XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0,
  151. XM_MASK = 0xc0
  152. } x86Mode;
  153. /* Structure to hold variable ModRM operand. */
  154. typedef struct {
  155. int32_t ofs; /* Offset. */
  156. uint8_t base; /* Base register or RID_NONE. */
  157. uint8_t idx; /* Index register or RID_NONE. */
  158. uint8_t scale; /* Index scale (XM_SCALE1 .. XM_SCALE8). */
  159. } x86ModRM;
  160. /* -- Opcodes ------------------------------------------------------------- */
  161. /* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */
  162. #define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24)))
  163. #define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24)))
  164. #define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24)))
  165. #define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24)))
  166. #define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24)))
  167. #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
  168. #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
  169. #define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
  170. #define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
  171. #define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
  172. #define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
  173. /* This list of x86 opcodes is not intended to be complete. Opcodes are only
  174. ** included when needed. Take a look at DynASM or jit.dis_x86 to see the
  175. ** whole mess.
  176. */
  177. typedef enum {
  178. /* Fixed length opcodes. XI_* prefix. */
  179. XI_O16 = 0x66,
  180. XI_NOP = 0x90,
  181. XI_XCHGa = 0x90,
  182. XI_CALL = 0xe8,
  183. XI_JMP = 0xe9,
  184. XI_JMPs = 0xeb,
  185. XI_PUSH = 0x50, /* Really 50+r. */
  186. XI_JCCs = 0x70, /* Really 7x. */
  187. XI_JCCn = 0x80, /* Really 0f8x. */
  188. XI_LEA = 0x8d,
  189. XI_MOVrib = 0xb0, /* Really b0+r. */
  190. XI_MOVri = 0xb8, /* Really b8+r. */
  191. XI_ARITHib = 0x80,
  192. XI_ARITHi = 0x81,
  193. XI_ARITHi8 = 0x83,
  194. XI_PUSHi8 = 0x6a,
  195. XI_TESTb = 0x84,
  196. XI_TEST = 0x85,
  197. XI_INT3 = 0xcc,
  198. XI_MOVmi = 0xc7,
  199. XI_GROUP5 = 0xff,
  200. /* Note: little-endian byte-order! */
  201. XI_FLDZ = 0xeed9,
  202. XI_FLD1 = 0xe8d9,
  203. XI_FDUP = 0xc0d9, /* Really fld st0. */
  204. XI_FPOP = 0xd8dd, /* Really fstp st0. */
  205. XI_FPOP1 = 0xd9dd, /* Really fstp st1. */
  206. XI_FRNDINT = 0xfcd9,
  207. XI_FSCALE = 0xfdd9,
  208. XI_FYL2X = 0xf1d9,
  209. /* VEX-encoded instructions. XV_* prefix. */
  210. XV_RORX = XV_f20f3a(f0),
  211. XV_SARX = XV_f30f38(f7),
  212. XV_SHLX = XV_660f38(f7),
  213. XV_SHRX = XV_f20f38(f7),
  214. /* Variable-length opcodes. XO_* prefix. */
  215. XO_OR = XO_(0b),
  216. XO_MOV = XO_(8b),
  217. XO_MOVto = XO_(89),
  218. XO_MOVtow = XO_66(89),
  219. XO_MOVtob = XO_(88),
  220. XO_MOVmi = XO_(c7),
  221. XO_MOVmib = XO_(c6),
  222. XO_LEA = XO_(8d),
  223. XO_ARITHib = XO_(80),
  224. XO_ARITHi = XO_(81),
  225. XO_ARITHi8 = XO_(83),
  226. XO_ARITHiw8 = XO_66(83),
  227. XO_SHIFTi = XO_(c1),
  228. XO_SHIFT1 = XO_(d1),
  229. XO_SHIFTcl = XO_(d3),
  230. XO_IMUL = XO_0f(af),
  231. XO_IMULi = XO_(69),
  232. XO_IMULi8 = XO_(6b),
  233. XO_CMP = XO_(3b),
  234. XO_TESTb = XO_(84),
  235. XO_TEST = XO_(85),
  236. XO_GROUP3b = XO_(f6),
  237. XO_GROUP3 = XO_(f7),
  238. XO_GROUP5b = XO_(fe),
  239. XO_GROUP5 = XO_(ff),
  240. XO_MOVZXb = XO_0f(b6),
  241. XO_MOVZXw = XO_0f(b7),
  242. XO_MOVSXb = XO_0f(be),
  243. XO_MOVSXw = XO_0f(bf),
  244. XO_MOVSXd = XO_(63),
  245. XO_BSWAP = XO_0f(c8),
  246. XO_CMOV = XO_0f(40),
  247. XO_MOVSD = XO_f20f(10),
  248. XO_MOVSDto = XO_f20f(11),
  249. XO_MOVSS = XO_f30f(10),
  250. XO_MOVSSto = XO_f30f(11),
  251. XO_MOVLPD = XO_660f(12),
  252. XO_MOVAPS = XO_0f(28),
  253. XO_XORPS = XO_0f(57),
  254. XO_ANDPS = XO_0f(54),
  255. XO_ADDSD = XO_f20f(58),
  256. XO_SUBSD = XO_f20f(5c),
  257. XO_MULSD = XO_f20f(59),
  258. XO_DIVSD = XO_f20f(5e),
  259. XO_SQRTSD = XO_f20f(51),
  260. XO_MINSD = XO_f20f(5d),
  261. XO_MAXSD = XO_f20f(5f),
  262. XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
  263. XO_UCOMISD = XO_660f(2e),
  264. XO_CVTSI2SD = XO_f20f(2a),
  265. XO_CVTTSD2SI= XO_f20f(2c),
  266. XO_CVTSI2SS = XO_f30f(2a),
  267. XO_CVTTSS2SI= XO_f30f(2c),
  268. XO_CVTSS2SD = XO_f30f(5a),
  269. XO_CVTSD2SS = XO_f20f(5a),
  270. XO_ADDSS = XO_f30f(58),
  271. XO_MOVD = XO_660f(6e),
  272. XO_MOVDto = XO_660f(7e),
  273. XO_FLDd = XO_(d9), XOg_FLDd = 0,
  274. XO_FLDq = XO_(dd), XOg_FLDq = 0,
  275. XO_FILDd = XO_(db), XOg_FILDd = 0,
  276. XO_FILDq = XO_(df), XOg_FILDq = 5,
  277. XO_FSTPd = XO_(d9), XOg_FSTPd = 3,
  278. XO_FSTPq = XO_(dd), XOg_FSTPq = 3,
  279. XO_FISTPq = XO_(df), XOg_FISTPq = 7,
  280. XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1,
  281. XO_FADDq = XO_(dc), XOg_FADDq = 0,
  282. XO_FLDCW = XO_(d9), XOg_FLDCW = 5,
  283. XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7
  284. } x86Op;
  285. /* x86 opcode groups. */
  286. typedef uint32_t x86Group;
  287. #define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
  288. #define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g)
  289. #define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000)))
  290. #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000)))
  291. #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27)))
  292. #define XO_ARITHw(a) ((x86Op)(0x036600fd + ((a)<<27)))
  293. typedef enum {
  294. XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP,
  295. XOg_X_IMUL
  296. } x86Arith;
  297. typedef enum {
  298. XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR
  299. } x86Shift;
  300. typedef enum {
  301. XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV
  302. } x86Group3;
  303. typedef enum {
  304. XOg_INC, XOg_DEC, XOg_CALL, XOg_CALLfar, XOg_JMP, XOg_JMPfar, XOg_PUSH
  305. } x86Group5;
  306. /* x86 condition codes. */
  307. typedef enum {
  308. CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE,
  309. CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE,
  310. CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB,
  311. CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE,
  312. CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL,
  313. CC_NG = CC_LE, CC_G = CC_NLE
  314. } x86CC;
  315. #endif