winabi.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. #include "all.h"
  2. #include <stdbool.h>
  3. typedef enum ArgPassStyle {
  4. APS_Invalid = 0,
  5. APS_Register,
  6. APS_InlineOnStack,
  7. APS_CopyAndPointerInRegister,
  8. APS_CopyAndPointerOnStack,
  9. APS_VarargsTag,
  10. APS_EnvTag,
  11. } ArgPassStyle;
  12. typedef struct ArgClass {
  13. Typ* type;
  14. ArgPassStyle style;
  15. int align;
  16. uint size;
  17. int cls;
  18. Ref ref;
  19. } ArgClass;
  20. typedef struct ExtraAlloc ExtraAlloc;
  21. struct ExtraAlloc {
  22. Ins instr;
  23. ExtraAlloc* link;
  24. };
  25. #define ALIGN_DOWN(n, a) ((n) & ~((a)-1))
  26. #define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a)-1, (a))
  27. // Number of stack bytes required be reserved for the callee.
  28. #define SHADOW_SPACE_SIZE 32
  29. int amd64_winabi_rsave[] = {RCX, RDX, R8, R9, R10, R11, RAX, XMM0,
  30. XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8,
  31. XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1};
  32. int amd64_winabi_rclob[] = {RBX, R12, R13, R14, R15, RSI, RDI, -1};
  33. MAKESURE(winabi_arrays_ok,
  34. sizeof amd64_winabi_rsave == (NGPS_WIN + NFPS + 1) * sizeof(int) &&
  35. sizeof amd64_winabi_rclob == (NCLR_WIN + 1) * sizeof(int));
  36. // layout of call's second argument (RCall)
  37. //
  38. // bit 0: rax returned
  39. // bit 1: xmm0 returned
  40. // bits 23: 0
  41. // bits 4567: rcx, rdx, r8, r9 passed
  42. // bits 89ab: xmm0,1,2,3 passed
  43. // bit c: env call (rax passed)
  44. // bits d..1f: 0
  45. bits amd64_winabi_retregs(Ref r, int p[2]) {
  46. assert(rtype(r) == RCall);
  47. bits b = 0;
  48. int num_int_returns = r.val & 1;
  49. int num_float_returns = r.val & 2;
  50. if (num_int_returns == 1) {
  51. b |= BIT(RAX);
  52. } else {
  53. b |= BIT(XMM0);
  54. }
  55. if (p) {
  56. p[0] = num_int_returns;
  57. p[1] = num_float_returns;
  58. }
  59. return b;
  60. }
  61. static uint popcnt(bits b) {
  62. b = (b & 0x5555555555555555) + ((b >> 1) & 0x5555555555555555);
  63. b = (b & 0x3333333333333333) + ((b >> 2) & 0x3333333333333333);
  64. b = (b & 0x0f0f0f0f0f0f0f0f) + ((b >> 4) & 0x0f0f0f0f0f0f0f0f);
  65. b += (b >> 8);
  66. b += (b >> 16);
  67. b += (b >> 32);
  68. return b & 0xff;
  69. }
  70. bits amd64_winabi_argregs(Ref r, int p[2]) {
  71. assert(rtype(r) == RCall);
  72. // On SysV, these are counts. Here, a count isn't sufficient, we actually need
  73. // to know which ones are in use because they're not necessarily contiguous.
  74. int int_passed = (r.val >> 4) & 15;
  75. int float_passed = (r.val >> 8) & 15;
  76. bool env_param = (r.val >> 12) & 1;
  77. bits b = 0;
  78. b |= (int_passed & 1) ? BIT(RCX) : 0;
  79. b |= (int_passed & 2) ? BIT(RDX) : 0;
  80. b |= (int_passed & 4) ? BIT(R8) : 0;
  81. b |= (int_passed & 8) ? BIT(R9) : 0;
  82. b |= (float_passed & 1) ? BIT(XMM0) : 0;
  83. b |= (float_passed & 2) ? BIT(XMM1) : 0;
  84. b |= (float_passed & 4) ? BIT(XMM2) : 0;
  85. b |= (float_passed & 8) ? BIT(XMM3) : 0;
  86. b |= env_param ? BIT(RAX) : 0;
  87. if (p) {
  88. // TODO: The only place this is used is live.c. I'm not sure what should be
  89. // returned here wrt to using the same counter for int/float regs on win.
  90. // For now, try the number of registers in use even though they're not
  91. // contiguous.
  92. p[0] = popcnt(int_passed);
  93. p[1] = popcnt(float_passed);
  94. }
  95. return b;
  96. }
  97. typedef struct RegisterUsage {
  98. // Counter for both int/float as they're counted together. Only if the bool's
  99. // set in regs_passed is the given register *actually* needed for a value
  100. // (i.e. needs to be saved, etc.).
  101. int num_regs_passed;
  102. // Indexed first by 0=int, 1=float, use KBASE(cls).
  103. // Indexed second by register index in calling convention, so for integer,
  104. // 0=RCX, 1=RDX, 2=R8, 3=R9, and for float XMM0, XMM1, XMM2, XMM3.
  105. bool regs_passed[2][4];
  106. bool rax_returned;
  107. bool xmm0_returned;
  108. // This is also used as where the va_start will start for varargs functions
  109. // (there's no 'Oparv', so we need to keep track of a count here.)
  110. int num_named_args_passed;
  111. // This is set when classifying the arguments for a call (but not when
  112. // classifying the parameters of a function definition).
  113. bool is_varargs_call;
  114. bool has_env;
  115. } RegisterUsage;
  116. static int register_usage_to_call_arg_value(RegisterUsage reg_usage) {
  117. return (reg_usage.rax_returned << 0) | //
  118. (reg_usage.xmm0_returned << 1) | //
  119. (reg_usage.regs_passed[0][0] << 4) | //
  120. (reg_usage.regs_passed[0][1] << 5) | //
  121. (reg_usage.regs_passed[0][2] << 6) | //
  122. (reg_usage.regs_passed[0][3] << 7) | //
  123. (reg_usage.regs_passed[1][0] << 8) | //
  124. (reg_usage.regs_passed[1][1] << 9) | //
  125. (reg_usage.regs_passed[1][2] << 10) | //
  126. (reg_usage.regs_passed[1][3] << 11) | //
  127. (reg_usage.has_env << 12);
  128. }
  129. // Assigns the argument to a register if there's any left according to the
  130. // calling convention, and updates the regs_passed bools. Otherwise marks the
  131. // value as needing stack space to be passed.
  132. static void assign_register_or_stack(RegisterUsage* reg_usage,
  133. ArgClass* arg,
  134. bool is_float,
  135. bool by_copy) {
  136. if (reg_usage->num_regs_passed == 4) {
  137. arg->style = by_copy ? APS_CopyAndPointerOnStack : APS_InlineOnStack;
  138. } else {
  139. reg_usage->regs_passed[is_float][reg_usage->num_regs_passed] = true;
  140. ++reg_usage->num_regs_passed;
  141. arg->style = by_copy ? APS_CopyAndPointerInRegister : APS_Register;
  142. }
  143. ++reg_usage->num_named_args_passed;
  144. }
  145. static bool type_is_by_copy(Typ* type) {
  146. // Note that only these sizes are passed by register, even though e.g. a
  147. // 5 byte struct would "fit", it still is passed by copy-and-pointer.
  148. return type->isdark || (type->size != 1 && type->size != 2 &&
  149. type->size != 4 && type->size != 8);
  150. }
  151. // This function is used for both arguments and parameters.
  152. // begin_instr should either point at the first Oarg or Opar, and end_instr
  153. // should point past the last one (so to the Ocall for arguments, or to the
  154. // first 'real' instruction of the function for parameters).
  155. static void classify_arguments(RegisterUsage* reg_usage,
  156. Ins* begin_instr,
  157. Ins* end_instr,
  158. ArgClass* arg_classes,
  159. Ref* env) {
  160. ArgClass* arg = arg_classes;
  161. // For each argument, determine how it will be passed (int, float, stack)
  162. // and update the `reg_usage` counts. Additionally, fill out arg_classes for
  163. // each argument.
  164. for (Ins* instr = begin_instr; instr < end_instr; ++instr, ++arg) {
  165. switch (instr->op) {
  166. case Oarg:
  167. case Opar:
  168. assign_register_or_stack(reg_usage, arg, KBASE(instr->cls),
  169. /*by_copy=*/false);
  170. arg->cls = instr->cls;
  171. arg->align = 3;
  172. arg->size = 8;
  173. break;
  174. case Oargc:
  175. case Oparc: {
  176. int typ_index = instr->arg[0].val;
  177. Typ* type = &typ[typ_index];
  178. bool by_copy = type_is_by_copy(type);
  179. assign_register_or_stack(reg_usage, arg, /*is_float=*/false, by_copy);
  180. arg->cls = Kl;
  181. if (!by_copy && type->size <= 4) {
  182. arg->cls = Kw;
  183. }
  184. arg->align = 3;
  185. arg->size = type->size;
  186. break;
  187. }
  188. case Oarge:
  189. *env = instr->arg[0];
  190. arg->style = APS_EnvTag;
  191. reg_usage->has_env = true;
  192. break;
  193. case Opare:
  194. *env = instr->to;
  195. arg->style = APS_EnvTag;
  196. reg_usage->has_env = true;
  197. break;
  198. case Oargv:
  199. reg_usage->is_varargs_call = true;
  200. arg->style = APS_VarargsTag;
  201. break;
  202. }
  203. }
  204. if (reg_usage->has_env && reg_usage->is_varargs_call) {
  205. die("can't use env with varargs");
  206. }
  207. // During a varargs call, float arguments have to be duplicated to their
  208. // associated integer register, so mark them as in-use too.
  209. if (reg_usage->is_varargs_call) {
  210. for (int i = 0; i < 4; ++i) {
  211. if (reg_usage->regs_passed[/*float*/ 1][i]) {
  212. reg_usage->regs_passed[/*int*/ 0][i] = true;
  213. }
  214. }
  215. }
  216. }
  217. static bool is_integer_type(int ty) {
  218. assert(ty >= 0 && ty < 4 && "expecting Kw Kl Ks Kd");
  219. return KBASE(ty) == 0;
  220. }
  221. static Ref register_for_arg(int cls, int counter) {
  222. assert(counter < 4);
  223. if (is_integer_type(cls)) {
  224. return TMP(amd64_winabi_rsave[counter]);
  225. } else {
  226. return TMP(XMM0 + counter);
  227. }
  228. }
  229. static Ins* lower_call(Fn* func,
  230. Blk* block,
  231. Ins* call_instr,
  232. ExtraAlloc** pextra_alloc) {
  233. // Call arguments are instructions. Walk through them to find the end of the
  234. // call+args that we need to process (and return the instruction past the body
  235. // of the instruction for continuing processing).
  236. Ins* instr_past_args = call_instr - 1;
  237. for (; instr_past_args >= block->ins; --instr_past_args) {
  238. if (!isarg(instr_past_args->op)) {
  239. break;
  240. }
  241. }
  242. Ins* earliest_arg_instr = instr_past_args + 1;
  243. // Don't need an ArgClass for the call itself, so one less than the total
  244. // number of instructions we're dealing with.
  245. uint num_args = call_instr - earliest_arg_instr;
  246. ArgClass* arg_classes = alloc(num_args * sizeof(ArgClass));
  247. RegisterUsage reg_usage = {0};
  248. ArgClass ret_arg_class = {0};
  249. // Ocall's two arguments are the the function to be called in 0, and, if the
  250. // the function returns a non-basic type, then arg[1] is a reference to the
  251. // type of the return. req checks if Refs are equal; `R` is 0.
  252. bool il_has_struct_return = !req(call_instr->arg[1], R);
  253. bool is_struct_return = false;
  254. if (il_has_struct_return) {
  255. Typ* ret_type = &typ[call_instr->arg[1].val];
  256. is_struct_return = type_is_by_copy(ret_type);
  257. if (is_struct_return) {
  258. assign_register_or_stack(&reg_usage, &ret_arg_class, /*is_float=*/false,
  259. /*by_copy=*/true);
  260. }
  261. ret_arg_class.size = ret_type->size;
  262. }
  263. Ref env = R;
  264. classify_arguments(&reg_usage, earliest_arg_instr, call_instr, arg_classes,
  265. &env);
  266. // We now know which arguments are on the stack and which are in registers, so
  267. // we can allocate the correct amount of space to stash the stack-located ones
  268. // into.
  269. uint stack_usage = 0;
  270. for (uint i = 0; i < num_args; ++i) {
  271. ArgClass* arg = &arg_classes[i];
  272. // stack_usage only accounts for pushes that are for values that don't have
  273. // enough registers. Large struct copies are alloca'd separately, and then
  274. // only have (potentially) 8 bytes to add to stack_usage here.
  275. if (arg->style == APS_InlineOnStack) {
  276. if (arg->align > 4) {
  277. err("win abi cannot pass alignments > 16");
  278. }
  279. stack_usage += arg->size;
  280. } else if (arg->style == APS_CopyAndPointerOnStack) {
  281. stack_usage += 8;
  282. }
  283. }
  284. stack_usage = ALIGN_UP(stack_usage, 16);
  285. // Note that here we're logically 'after' the call (due to emitting
  286. // instructions in reverse order), so we're doing a negative stack
  287. // allocation to clean up after the call.
  288. Ref stack_size_ref =
  289. getcon(-(int64_t)(stack_usage + SHADOW_SPACE_SIZE), func);
  290. emit(Osalloc, Kl, R, stack_size_ref, R);
  291. ExtraAlloc* return_pad = NULL;
  292. if (is_struct_return) {
  293. return_pad = alloc(sizeof(ExtraAlloc));
  294. Ref ret_pad_ref = newtmp("abi.ret_pad", Kl, func);
  295. return_pad->instr =
  296. (Ins){Oalloc8, Kl, ret_pad_ref, {getcon(ret_arg_class.size, func)}};
  297. return_pad->link = (*pextra_alloc);
  298. *pextra_alloc = return_pad;
  299. reg_usage.rax_returned = true;
  300. emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
  301. } else {
  302. if (il_has_struct_return) {
  303. // In the case that at the IL level, a struct return was specified, but as
  304. // far as the calling convention is concerned it's not actually by
  305. // pointer, we need to store the return value into an alloca because
  306. // subsequent IL will still be treating the function return as a pointer.
  307. ExtraAlloc* return_copy = alloc(sizeof(ExtraAlloc));
  308. return_copy->instr =
  309. (Ins){Oalloc8, Kl, call_instr->to, {getcon(8, func)}};
  310. return_copy->link = (*pextra_alloc);
  311. *pextra_alloc = return_copy;
  312. Ref copy = newtmp("abi.copy", Kl, func);
  313. emit(Ostorel, 0, R, copy, call_instr->to);
  314. emit(Ocopy, Kl, copy, TMP(RAX), R);
  315. reg_usage.rax_returned = true;
  316. } else if (is_integer_type(call_instr->cls)) {
  317. // Only a basic type returned from the call, integer.
  318. emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
  319. reg_usage.rax_returned = true;
  320. } else {
  321. // Basic type, floating point.
  322. emit(Ocopy, call_instr->cls, call_instr->to, TMP(XMM0), R);
  323. reg_usage.xmm0_returned = true;
  324. }
  325. }
  326. // Emit the actual call instruction. There's no 'to' value by this point
  327. // because we've lowered it into register manipulation (that's the `R`),
  328. // arg[0] of the call is the function, and arg[1] is register usage is
  329. // documented as above (copied from SysV).
  330. emit(Ocall, call_instr->cls, R, call_instr->arg[0],
  331. CALL(register_usage_to_call_arg_value(reg_usage)));
  332. if (!req(R, env)) {
  333. // If there's an env arg to be passed, it gets stashed in RAX.
  334. emit(Ocopy, Kl, TMP(RAX), env, R);
  335. }
  336. if (reg_usage.is_varargs_call) {
  337. // Any float arguments need to be duplicated to integer registers. This is
  338. // required by the calling convention so that dumping to shadow space can be
  339. // done without a prototype and for varargs.
  340. #define DUP_IF_USED(index, floatreg, intreg) \
  341. if (reg_usage.regs_passed[/*float*/ 1][index]) { \
  342. emit(Ocast, Kl, TMP(intreg), TMP(floatreg), R); \
  343. }
  344. DUP_IF_USED(0, XMM0, RCX);
  345. DUP_IF_USED(1, XMM1, RDX);
  346. DUP_IF_USED(2, XMM2, R8);
  347. DUP_IF_USED(3, XMM3, R9);
  348. #undef DUP_IF_USED
  349. }
  350. int reg_counter = 0;
  351. if (is_struct_return) {
  352. Ref first_reg = register_for_arg(Kl, reg_counter++);
  353. emit(Ocopy, Kl, first_reg, return_pad->instr.to, R);
  354. }
  355. // This is where we actually do the load of values into registers or into
  356. // stack slots.
  357. Ref arg_stack_slots = newtmp("abi.args", Kl, func);
  358. uint slot_offset = SHADOW_SPACE_SIZE;
  359. ArgClass* arg = arg_classes;
  360. for (Ins* instr = earliest_arg_instr; instr != call_instr; ++instr, ++arg) {
  361. switch (arg->style) {
  362. case APS_Register: {
  363. Ref into = register_for_arg(arg->cls, reg_counter++);
  364. if (instr->op == Oargc) {
  365. // If this is a small struct being passed by value. The value in the
  366. // instruction in this case is a pointer, but it needs to be loaded
  367. // into the register.
  368. emit(Oload, arg->cls, into, instr->arg[1], R);
  369. } else {
  370. // Otherwise, a normal value passed in a register.
  371. emit(Ocopy, instr->cls, into, instr->arg[0], R);
  372. }
  373. break;
  374. }
  375. case APS_InlineOnStack: {
  376. Ref slot = newtmp("abi.off", Kl, func);
  377. if (instr->op == Oargc) {
  378. // This is a small struct, so it's not passed by copy, but the
  379. // instruction is a pointer. So we need to copy it into the stack
  380. // slot. (And, remember that these are emitted backwards, so store,
  381. // then load.)
  382. Ref smalltmp = newtmp("abi.smalltmp", arg->cls, func);
  383. emit(Ostorel, 0, R, smalltmp, slot);
  384. emit(Oload, arg->cls, smalltmp, instr->arg[1], R);
  385. } else {
  386. // Stash the value into the stack slot.
  387. emit(Ostorel, 0, R, instr->arg[0], slot);
  388. }
  389. emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
  390. slot_offset += arg->size;
  391. break;
  392. }
  393. case APS_CopyAndPointerInRegister:
  394. case APS_CopyAndPointerOnStack: {
  395. // Alloca a space to copy into, and blit the value from the instr to the
  396. // copied location.
  397. ExtraAlloc* arg_copy = alloc(sizeof(ExtraAlloc));
  398. Ref copy_ref = newtmp("abi.copy", Kl, func);
  399. arg_copy->instr =
  400. (Ins){Oalloc8, Kl, copy_ref, {getcon(arg->size, func)}};
  401. arg_copy->link = (*pextra_alloc);
  402. *pextra_alloc = arg_copy;
  403. emit(Oblit1, 0, R, INT(arg->size), R);
  404. emit(Oblit0, 0, R, instr->arg[1], copy_ref);
  405. // Now load the pointer into the correct register or stack slot.
  406. if (arg->style == APS_CopyAndPointerInRegister) {
  407. Ref into = register_for_arg(arg->cls, reg_counter++);
  408. emit(Ocopy, Kl, into, copy_ref, R);
  409. } else {
  410. assert(arg->style == APS_CopyAndPointerOnStack);
  411. Ref slot = newtmp("abi.off", Kl, func);
  412. emit(Ostorel, 0, R, copy_ref, slot);
  413. emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
  414. slot_offset += 8;
  415. }
  416. break;
  417. }
  418. case APS_EnvTag:
  419. case APS_VarargsTag:
  420. // Nothing to do here, see right before the call for reg dupe.
  421. break;
  422. case APS_Invalid:
  423. die("unreachable");
  424. }
  425. }
  426. if (stack_usage) {
  427. // The last (first in call order) thing we do is allocate the the stack
  428. // space we're going to fill with temporaries.
  429. emit(Osalloc, Kl, arg_stack_slots,
  430. getcon(stack_usage + SHADOW_SPACE_SIZE, func), R);
  431. } else {
  432. // When there's no usage for temporaries, we can add this into the other
  433. // alloca, but otherwise emit it separately (not storing into a reference)
  434. // so that it doesn't get removed later for being useless.
  435. emit(Osalloc, Kl, R, getcon(SHADOW_SPACE_SIZE, func), R);
  436. }
  437. return instr_past_args;
  438. }
  439. static void lower_block_return(Fn* func, Blk* block) {
  440. int jmp_type = block->jmp.type;
  441. if (!isret(jmp_type) || jmp_type == Jret0) {
  442. return;
  443. }
  444. // Save the argument, and set the block to be a void return because once it's
  445. // lowered it's handled by the the register/stack manipulation.
  446. Ref ret_arg = block->jmp.arg;
  447. block->jmp.type = Jret0;
  448. RegisterUsage reg_usage = {0};
  449. if (jmp_type == Jretc) {
  450. Typ* type = &typ[func->retty];
  451. if (type_is_by_copy(type)) {
  452. assert(rtype(func->retr) == RTmp);
  453. emit(Ocopy, Kl, TMP(RAX), func->retr, R);
  454. emit(Oblit1, 0, R, INT(type->size), R);
  455. emit(Oblit0, 0, R, ret_arg, func->retr);
  456. } else {
  457. emit(Oload, Kl, TMP(RAX), ret_arg, R);
  458. }
  459. reg_usage.rax_returned = true;
  460. } else {
  461. int k = jmp_type - Jretw;
  462. if (is_integer_type(k)) {
  463. emit(Ocopy, k, TMP(RAX), ret_arg, R);
  464. reg_usage.rax_returned = true;
  465. } else {
  466. emit(Ocopy, k, TMP(XMM0), ret_arg, R);
  467. reg_usage.xmm0_returned = true;
  468. }
  469. }
  470. block->jmp.arg = CALL(register_usage_to_call_arg_value(reg_usage));
  471. }
  472. static void lower_vastart(Fn* func,
  473. RegisterUsage* param_reg_usage,
  474. Ref valist) {
  475. assert(func->vararg);
  476. // In varargs functions:
  477. // 1. the int registers are already dumped to the shadow stack space;
  478. // 2. any parameters passed in floating point registers have
  479. // been duplicated to the integer registers
  480. // 3. we ensure (later) that for varargs functions we're always using an rbp
  481. // frame pointer.
  482. // So, the ... argument is just indexed past rbp by the number of named values
  483. // that were actually passed.
  484. Ref offset = newtmp("abi.vastart", Kl, func);
  485. emit(Ostorel, 0, R, offset, valist);
  486. // *8 for sizeof(u64), +16 because the return address and rbp have been pushed
  487. // by the time we get to the body of the function.
  488. emit(Oadd, Kl, offset, TMP(RBP),
  489. getcon(param_reg_usage->num_named_args_passed * 8 + 16, func));
  490. }
  491. static void lower_vaarg(Fn* func, Ins* vaarg_instr) {
  492. // va_list is just a void** on winx64, so load the pointer, then load the
  493. // argument from that pointer, then increment the pointer to the next arg.
  494. // (All emitted backwards as usual.)
  495. Ref inc = newtmp("abi.vaarg.inc", Kl, func);
  496. Ref ptr = newtmp("abi.vaarg.ptr", Kl, func);
  497. emit(Ostorel, 0, R, inc, vaarg_instr->arg[0]);
  498. emit(Oadd, Kl, inc, ptr, getcon(8, func));
  499. emit(Oload, vaarg_instr->cls, vaarg_instr->to, ptr, R);
  500. emit(Oload, Kl, ptr, vaarg_instr->arg[0], R);
  501. }
  502. static void lower_args_for_block(Fn* func,
  503. Blk* block,
  504. RegisterUsage* param_reg_usage,
  505. ExtraAlloc** pextra_alloc) {
  506. // global temporary buffer used by emit. Reset to the end, and predecremented
  507. // when adding to it.
  508. curi = &insb[NIns];
  509. lower_block_return(func, block);
  510. if (block->nins) {
  511. // Work backwards through the instructions, either copying them unchanged,
  512. // or modifying as necessary.
  513. for (Ins* instr = &block->ins[block->nins - 1]; instr >= block->ins;) {
  514. switch (instr->op) {
  515. case Ocall:
  516. instr = lower_call(func, block, instr, pextra_alloc);
  517. break;
  518. case Ovastart:
  519. lower_vastart(func, param_reg_usage, instr->arg[0]);
  520. --instr;
  521. break;
  522. case Ovaarg:
  523. lower_vaarg(func, instr);
  524. --instr;
  525. break;
  526. case Oarg:
  527. case Oargc:
  528. die("unreachable");
  529. default:
  530. emiti(*instr);
  531. --instr;
  532. break;
  533. }
  534. }
  535. }
  536. // This it the start block, which is processed last. Add any allocas that
  537. // other blocks needed.
  538. bool is_start_block = block == func->start;
  539. if (is_start_block) {
  540. for (ExtraAlloc* ea = *pextra_alloc; ea; ea = ea->link) {
  541. emiti(ea->instr);
  542. }
  543. }
  544. // emit/emiti add instructions from the end to the beginning of the temporary
  545. // global buffer. dup the final version into the final block storage.
  546. block->nins = &insb[NIns] - curi;
  547. idup(block, curi, block->nins);
  548. }
  549. static Ins* find_end_of_func_parameters(Blk* start_block) {
  550. Ins* i;
  551. for (i = start_block->ins; i < &start_block->ins[start_block->nins]; ++i) {
  552. if (!ispar(i->op)) {
  553. break;
  554. }
  555. }
  556. return i;
  557. }
  558. // Copy from registers/stack into values.
  559. static RegisterUsage lower_func_parameters(Fn* func) {
  560. // This is half-open, so end points after the last Opar.
  561. Blk* start_block = func->start;
  562. Ins* start_of_params = start_block->ins;
  563. Ins* end_of_params = find_end_of_func_parameters(start_block);
  564. size_t num_params = end_of_params - start_of_params;
  565. ArgClass* arg_classes = alloc(num_params * sizeof(ArgClass));
  566. ArgClass arg_ret = {0};
  567. // global temporary buffer used by emit. Reset to the end, and predecremented
  568. // when adding to it.
  569. curi = &insb[NIns];
  570. int reg_counter = 0;
  571. RegisterUsage reg_usage = {0};
  572. if (func->retty >= 0) {
  573. bool by_copy = type_is_by_copy(&typ[func->retty]);
  574. if (by_copy) {
  575. assign_register_or_stack(&reg_usage, &arg_ret, /*is_float=*/false,
  576. by_copy);
  577. Ref ret_ref = newtmp("abi.ret", Kl, func);
  578. emit(Ocopy, Kl, ret_ref, TMP(RCX), R);
  579. func->retr = ret_ref;
  580. ++reg_counter;
  581. }
  582. }
  583. Ref env = R;
  584. classify_arguments(&reg_usage, start_of_params, end_of_params, arg_classes,
  585. &env);
  586. func->reg = amd64_winabi_argregs(
  587. CALL(register_usage_to_call_arg_value(reg_usage)), NULL);
  588. // Copy from the registers or stack slots into the named parameters. Depending
  589. // on how they're passed, they either need to be copied or loaded.
  590. ArgClass* arg = arg_classes;
  591. uint slot_offset = SHADOW_SPACE_SIZE / 4 + 4;
  592. for (Ins* instr = start_of_params; instr < end_of_params; ++instr, ++arg) {
  593. switch (arg->style) {
  594. case APS_Register: {
  595. Ref from = register_for_arg(arg->cls, reg_counter++);
  596. // If it's a struct at the IL level, we need to copy the register into
  597. // an alloca so we have something to point at (same for InlineOnStack).
  598. if (instr->op == Oparc) {
  599. arg->ref = newtmp("abi", Kl, func);
  600. emit(Ostorel, 0, R, arg->ref, instr->to);
  601. emit(Ocopy, instr->cls, arg->ref, from, R);
  602. emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
  603. } else {
  604. emit(Ocopy, instr->cls, instr->to, from, R);
  605. }
  606. break;
  607. }
  608. case APS_InlineOnStack:
  609. if (instr->op == Oparc) {
  610. arg->ref = newtmp("abi", Kl, func);
  611. emit(Ostorel, 0, R, arg->ref, instr->to);
  612. emit(Ocopy, instr->cls, arg->ref, SLOT(-slot_offset), R);
  613. emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
  614. } else {
  615. emit(Ocopy, Kl, instr->to, SLOT(-slot_offset), R);
  616. }
  617. slot_offset += 2;
  618. break;
  619. case APS_CopyAndPointerOnStack:
  620. emit(Oload, Kl, instr->to, SLOT(-slot_offset), R);
  621. slot_offset += 2;
  622. break;
  623. case APS_CopyAndPointerInRegister: {
  624. // Because this has to be a copy (that we own), it is sufficient to just
  625. // copy the register to the target.
  626. Ref from = register_for_arg(Kl, reg_counter++);
  627. emit(Ocopy, Kl, instr->to, from, R);
  628. break;
  629. }
  630. case APS_EnvTag:
  631. break;
  632. case APS_VarargsTag:
  633. case APS_Invalid:
  634. die("unreachable");
  635. }
  636. }
  637. // If there was an `env`, it was passed in RAX, so copy it into the env ref.
  638. if (!req(R, env)) {
  639. emit(Ocopy, Kl, env, TMP(RAX), R);
  640. }
  641. int num_created_instrs = &insb[NIns] - curi;
  642. int num_other_after_instrs = (int)(start_block->nins - num_params);
  643. int new_total_instrs = num_other_after_instrs + num_created_instrs;
  644. Ins* new_instrs = vnew(new_total_instrs, sizeof(Ins), PFn);
  645. Ins* instr_p = icpy(new_instrs, curi, num_created_instrs);
  646. icpy(instr_p, end_of_params, num_other_after_instrs);
  647. start_block->nins = new_total_instrs;
  648. start_block->ins = new_instrs;
  649. return reg_usage;
  650. }
  651. // The main job of this function is to lower generic instructions into the
  652. // specific details of how arguments are passed, and parameters are
  653. // interpreted for win x64. A useful reference is
  654. // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention .
  655. //
  656. // Some of the major differences from SysV if you're comparing the code
  657. // (non-exhaustive):
  658. // - only 4 int and 4 float regs are used
  659. // - when an int register is assigned a value, its associated float register is
  660. // left unused (and vice versa). i.e. there's only one counter as you assign
  661. // arguments to registers.
  662. // - any structs that aren't 1/2/4/8 bytes in size are passed by pointer, not
  663. // by copying them into the stack. So e.g. if you pass something like
  664. // `struct { void*, int64_t }` by value, it first needs to be copied to
  665. // another alloca (in order to maintain value semantics at the language
  666. // level), then the pointer to that copy is treated as a regular integer
  667. // argument (which then itself may *also* be copied to the stack in the case
  668. // there's no integer register remaining.)
  669. // - when calling a varargs functions, floating point values must be duplicated
  670. // integer registers. Along with the above restrictions, this makes varargs
  671. // handling simpler for the callee than SysV.
  672. void amd64_winabi_abi(Fn* func) {
  673. // The first thing to do is lower incoming parameters to this function.
  674. RegisterUsage param_reg_usage = lower_func_parameters(func);
  675. // This is the second larger part of the job. We walk all blocks, and rewrite
  676. // instructions returns, calls, and handling of varargs into their win x64
  677. // specific versions. Any other instructions are just passed through unchanged
  678. // by using `emiti`.
  679. // Skip over the entry block, and do it at the end so that our later
  680. // modifications can add allocations to the start block. In particular, we
  681. // need to add stack allocas for copies when structs are passed or returned by
  682. // value.
  683. ExtraAlloc* extra_alloc = NULL;
  684. for (Blk* block = func->start->link; block; block = block->link) {
  685. lower_args_for_block(func, block, &param_reg_usage, &extra_alloc);
  686. }
  687. lower_args_for_block(func, func->start, &param_reg_usage, &extra_alloc);
  688. if (debug['A']) {
  689. fprintf(stderr, "\n> After ABI lowering:\n");
  690. printfn(func, stderr);
  691. }
  692. }