cgcpu.pas 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. This unit implements the code generator for the x86-64.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,cgutils,cgobj,cgx86,
  22. aasmbase,aasmtai,aasmdata,aasmcpu,
  23. cpubase,parabase,
  24. symdef,
  25. symconst,rgx86,procinfo;
  26. type
  27. tcgx86_64 = class(tcgx86)
  28. procedure init_register_allocators;override;
  29. procedure a_loadfpu_ref_cgpara(list: TAsmList; size: tcgsize; const ref: treference; const cgpara: TCGPara); override;
  30. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  31. procedure g_proc_entry(list : TAsmList;localsize:longint; nostackframe:boolean);override;
  32. procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
  33. procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override;
  34. procedure g_save_registers(list: TAsmList);override;
  35. procedure g_restore_registers(list: TAsmList);override;
  36. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  37. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
  38. function use_ms_abi: boolean;
  39. private
  40. function use_push: boolean;
  41. function saved_xmm_reg_size: longint;
  42. end;
  43. procedure create_codegen;
  44. implementation
  45. uses
  46. globtype,globals,verbose,systems,cutils,cclasses,
  47. symtable,paramgr,cpupi,
  48. rgcpu,ncgutil;
  49. procedure Tcgx86_64.init_register_allocators;
  50. var
  51. ms_abi: boolean;
  52. begin
  53. inherited init_register_allocators;
  54. ms_abi:=use_ms_abi;
  55. if ms_abi then
  56. begin
  57. if (cs_userbp in current_settings.optimizerswitches) and assigned(current_procinfo) and (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
  58. begin
  59. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
  60. RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP],first_int_imreg,[]);
  61. end
  62. else
  63. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
  64. RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[])
  65. end
  66. else
  67. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
  68. RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
  69. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
  70. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
  71. rgfpu:=Trgx86fpu.create;
  72. end;
  73. procedure tcgx86_64.a_loadfpu_ref_cgpara(list: TAsmList; size: tcgsize; const ref: treference; const cgpara: TCGPara);
  74. begin
  75. { a record containing an extended value is returned on the x87 stack
  76. -> size will be OS_F128 (if not packed), while cgpara.paraloc^.size
  77. contains the proper size
  78. In the future we should probably always use cgpara.location^.size, but
  79. that should only be tested/done after 2.8 is branched }
  80. if size in [OS_128,OS_F128] then
  81. size:=cgpara.location^.size;
  82. inherited;
  83. end;
  84. procedure tcgx86_64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  85. begin
  86. { same as with a_loadfpu_ref_cgpara() above, but on the callee side
  87. when the value is moved from the fpu register into a memory location }
  88. if tosize in [OS_128,OS_F128] then
  89. tosize:=OS_F80;
  90. inherited;
  91. end;
  92. function tcgx86_64.use_push: boolean;
  93. begin
  94. result:=(current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  95. (current_procinfo.procdef.proctypeoption=potype_exceptfilter);
  96. end;
  97. function tcgx86_64.saved_xmm_reg_size: longint;
  98. var
  99. i: longint;
  100. regs_to_save_mm: tcpuregisterarray;
  101. begin
  102. result:=0;
  103. if (target_info.system<>system_x86_64_win64) or
  104. (not uses_registers(R_MMREGISTER)) then
  105. exit;
  106. regs_to_save_mm:=paramanager.get_saved_registers_mm(current_procinfo.procdef.proccalloption);
  107. for i:=low(regs_to_save_mm) to high(regs_to_save_mm) do
  108. begin
  109. if (regs_to_save_mm[i] in rg[R_MMREGISTER].used_in_proc) then
  110. inc(result,tcgsize2size[OS_VECTOR]);
  111. end;
  112. end;
  113. procedure tcgx86_64.g_proc_entry(list : TAsmList;localsize:longint;nostackframe:boolean);
  114. var
  115. hitem: tlinkedlistitem;
  116. seh_proc: tai_seh_directive;
  117. r: integer;
  118. href: treference;
  119. templist: TAsmList;
  120. frame_offset: longint;
  121. suppress_endprologue: boolean;
  122. stackmisalignment: longint;
  123. xmmsize: longint;
  124. regs_to_save_int,
  125. regs_to_save_mm: tcpuregisterarray;
  126. procedure push_one_reg(reg: tregister);
  127. begin
  128. list.concat(taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],reg));
  129. if (target_info.system=system_x86_64_win64) then
  130. begin
  131. list.concat(cai_seh_directive.create_reg(ash_pushreg,reg));
  132. include(current_procinfo.flags,pi_has_unwind_info);
  133. end;
  134. end;
  135. procedure push_regs;
  136. var
  137. r: longint;
  138. usedregs: tcpuregisterset;
  139. begin
  140. usedregs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption);
  141. for r := low(regs_to_save_int) to high(regs_to_save_int) do
  142. if regs_to_save_int[r] in usedregs then
  143. begin
  144. inc(stackmisalignment,sizeof(pint));
  145. push_one_reg(newreg(R_INTREGISTER,regs_to_save_int[r],R_SUBWHOLE));
  146. end;
  147. end;
  148. begin
  149. regs_to_save_int:=paramanager.get_saved_registers_int(current_procinfo.procdef.proccalloption);
  150. regs_to_save_mm:=paramanager.get_saved_registers_mm(current_procinfo.procdef.proccalloption);
  151. hitem:=list.last;
  152. { pi_has_unwind_info may already be set at this point if there are
  153. SEH directives in assembler body. In this case, .seh_endprologue
  154. is expected to be one of those directives, and not generated here. }
  155. suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
  156. { save old framepointer }
  157. if not nostackframe then
  158. begin
  159. { return address }
  160. stackmisalignment := sizeof(pint);
  161. list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
  162. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  163. begin
  164. push_regs;
  165. CGmessage(cg_d_stackframe_omited);
  166. end
  167. else
  168. begin
  169. { push <frame_pointer> }
  170. inc(stackmisalignment,sizeof(pint));
  171. push_one_reg(NR_FRAME_POINTER_REG);
  172. { Return address and FP are both on stack }
  173. current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
  174. current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
  175. if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
  176. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
  177. else
  178. begin
  179. push_regs;
  180. gen_load_frame_for_exceptfilter(list);
  181. { Need only as much stack space as necessary to do the calls.
  182. Exception filters don't have own local vars, and temps are 'mapped'
  183. to the parent procedure.
  184. maxpushedparasize is already aligned at least on x86_64. }
  185. localsize:=current_procinfo.maxpushedparasize;
  186. end;
  187. current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FRAME_POINTER_REG);
  188. {
  189. TODO: current framepointer handling is not compatible with Win64 at all:
  190. Win64 expects FP to point to the top or into the middle of local area.
  191. In FPC it points to the bottom, making it impossible to generate
  192. UWOP_SET_FPREG unwind code if local area is > 240 bytes.
  193. So for now pretend we never have a framepointer.
  194. }
  195. end;
  196. xmmsize:=saved_xmm_reg_size;
  197. if use_push and (xmmsize<>0) then
  198. begin
  199. localsize:=align(localsize,target_info.stackalign)+xmmsize;
  200. reference_reset_base(current_procinfo.save_regs_ref,NR_STACK_POINTER_REG,
  201. localsize-xmmsize,ctempposinvalid,tcgsize2size[OS_VECTOR],[]);
  202. end;
  203. { allocate stackframe space }
  204. if (localsize<>0) or
  205. ((target_info.stackalign>sizeof(pint)) and
  206. (stackmisalignment <> 0) and
  207. ((pi_do_call in current_procinfo.flags) or
  208. (po_assembler in current_procinfo.procdef.procoptions))) then
  209. begin
  210. if target_info.stackalign>sizeof(pint) then
  211. localsize := align(localsize+stackmisalignment,target_info.stackalign)-stackmisalignment;
  212. g_stackpointer_alloc(list,localsize);
  213. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  214. current_asmdata.asmcfi.cfa_def_cfa_offset(list,localsize+sizeof(pint));
  215. current_procinfo.final_localsize:=localsize;
  216. if (target_info.system=system_x86_64_win64) then
  217. begin
  218. if localsize<>0 then
  219. list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
  220. include(current_procinfo.flags,pi_has_unwind_info);
  221. if use_push and (xmmsize<>0) then
  222. begin
  223. href:=current_procinfo.save_regs_ref;
  224. for r:=low(regs_to_save_mm) to high(regs_to_save_mm) do
  225. if regs_to_save_mm[r] in rg[R_MMREGISTER].used_in_proc then
  226. begin
  227. a_loadmm_reg_ref(list,OS_VECTOR,OS_VECTOR,newreg(R_MMREGISTER,regs_to_save_mm[r],R_SUBMMWHOLE),href,nil);
  228. inc(href.offset,tcgsize2size[OS_VECTOR]);
  229. end;
  230. end;
  231. end;
  232. end;
  233. end;
  234. if not (pi_has_unwind_info in current_procinfo.flags) then
  235. exit;
  236. { Generate unwind data for x86_64-win64 }
  237. seh_proc:=cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname);
  238. if assigned(hitem) then
  239. list.insertafter(seh_proc,hitem)
  240. else
  241. list.insert(seh_proc);
  242. { the directive creates another section }
  243. inc(list.section_count);
  244. templist:=TAsmList.Create;
  245. { We need to record postive offsets from RSP; if registers are saved
  246. at negative offsets from RBP we need to account for it. }
  247. if (not use_push) then
  248. frame_offset:=current_procinfo.final_localsize
  249. else
  250. frame_offset:=0;
  251. { There's no need to describe position of register saves precisely;
  252. since registers are not modified before they are saved, and saves do not
  253. change RSP, 'logically' all saves can happen at the end of prologue. }
  254. href:=current_procinfo.save_regs_ref;
  255. if (not use_push) then
  256. begin
  257. for r:=low(regs_to_save_int) to high(regs_to_save_int) do
  258. if regs_to_save_int[r] in rg[R_INTREGISTER].used_in_proc then
  259. begin
  260. templist.concat(cai_seh_directive.create_reg_offset(ash_savereg,
  261. newreg(R_INTREGISTER,regs_to_save_int[r],R_SUBWHOLE),
  262. href.offset+frame_offset));
  263. inc(href.offset,sizeof(aint));
  264. end;
  265. end;
  266. if uses_registers(R_MMREGISTER) then
  267. begin
  268. if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then
  269. inc(href.offset,tcgsize2size[OS_VECTOR]-(href.offset mod tcgsize2size[OS_VECTOR]));
  270. for r:=low(regs_to_save_mm) to high(regs_to_save_mm) do
  271. begin
  272. if regs_to_save_mm[r] in rg[R_MMREGISTER].used_in_proc then
  273. begin
  274. templist.concat(cai_seh_directive.create_reg_offset(ash_savexmm,
  275. newreg(R_MMREGISTER,regs_to_save_mm[r],R_SUBMMWHOLE),
  276. href.offset+frame_offset));
  277. inc(href.offset,tcgsize2size[OS_VECTOR]);
  278. end;
  279. end;
  280. end;
  281. if not suppress_endprologue then
  282. templist.concat(cai_seh_directive.create(ash_endprologue));
  283. if assigned(current_procinfo.endprologue_ai) then
  284. current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
  285. else
  286. list.concatlist(templist);
  287. templist.free;
  288. end;
  289. procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  290. procedure increase_sp(a : tcgint);
  291. var
  292. href : treference;
  293. begin
  294. reference_reset_base(href,NR_STACK_POINTER_REG,a,ctempposinvalid,0,[]);
  295. { normally, lea is a better choice than an add }
  296. list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
  297. end;
  298. var
  299. href : treference;
  300. hreg : tregister;
  301. r : longint;
  302. regs_to_save_mm: tcpuregisterarray;
  303. begin
  304. regs_to_save_mm:=paramanager.get_saved_registers_mm(current_procinfo.procdef.proccalloption);;
  305. { Prevent return address from a possible call from ending up in the epilogue }
  306. { (restoring registers happens before epilogue, providing necessary padding) }
  307. if (current_procinfo.flags*[pi_has_unwind_info,pi_do_call,pi_has_saved_regs])=[pi_has_unwind_info,pi_do_call] then
  308. list.concat(Taicpu.op_none(A_NOP));
  309. { remove stackframe }
  310. if not nostackframe then
  311. begin
  312. if use_push then
  313. begin
  314. if (saved_xmm_reg_size<>0) then
  315. begin
  316. href:=current_procinfo.save_regs_ref;
  317. for r:=low(regs_to_save_mm) to high(regs_to_save_mm) do
  318. if regs_to_save_mm[r] in rg[R_MMREGISTER].used_in_proc then
  319. begin
  320. { Allocate register so the optimizer does not remove the load }
  321. hreg:=newreg(R_MMREGISTER,regs_to_save_mm[r],R_SUBMMWHOLE);
  322. a_reg_alloc(list,hreg);
  323. a_loadmm_ref_reg(list,OS_VECTOR,OS_VECTOR,href,hreg,nil);
  324. inc(href.offset,tcgsize2size[OS_VECTOR]);
  325. end;
  326. end;
  327. if (current_procinfo.final_localsize<>0) then
  328. increase_sp(current_procinfo.final_localsize);
  329. internal_restore_regs(list,true);
  330. if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
  331. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  332. end
  333. else if (target_info.system=system_x86_64_win64) then
  334. begin
  335. { Comply with Win64 unwinding mechanism, which only recognizes
  336. 'add $constant,%rsp' and 'lea offset(FPREG),%rsp' as belonging to
  337. the function epilog.
  338. Neither 'leave' nor even 'mov %FPREG,%rsp' are allowed. }
  339. reference_reset_base(href,current_procinfo.framepointer,0,ctempposinvalid,sizeof(pint),[]);
  340. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],href,NR_STACK_POINTER_REG));
  341. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],current_procinfo.framepointer));
  342. end
  343. else
  344. generate_leave(list);
  345. list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
  346. end;
  347. list.concat(Taicpu.Op_none(A_RET,S_NO));
  348. if (pi_has_unwind_info in current_procinfo.flags) then
  349. begin
  350. tcpuprocinfo(current_procinfo).dump_scopes(list);
  351. list.concat(cai_seh_directive.create(ash_endproc));
  352. end;
  353. end;
  354. procedure tcgx86_64.g_save_registers(list: TAsmList);
  355. begin
  356. if (not use_push) then
  357. inherited g_save_registers(list);
  358. end;
  359. procedure tcgx86_64.g_restore_registers(list: TAsmList);
  360. begin
  361. if (not use_push) then
  362. inherited g_restore_registers(list);
  363. end;
  364. procedure tcgx86_64.g_local_unwind(list: TAsmList; l: TAsmLabel);
  365. var
  366. para1,para2: tcgpara;
  367. href: treference;
  368. pd: tprocdef;
  369. begin
  370. if (target_info.system<>system_x86_64_win64) then
  371. begin
  372. inherited g_local_unwind(list,l);
  373. exit;
  374. end;
  375. pd:=search_system_proc('_fpc_local_unwind');
  376. para1.init;
  377. para2.init;
  378. paramanager.getintparaloc(list,pd,1,para1);
  379. paramanager.getintparaloc(list,pd,2,para2);
  380. reference_reset_symbol(href,l,0,1,[]);
  381. { TODO: using RSP is correct only while the stack is fixed!!
  382. (true now, but will change if/when allocating from stack is implemented) }
  383. a_load_reg_cgpara(list,OS_ADDR,NR_STACK_POINTER_REG,para1);
  384. a_loadaddr_ref_cgpara(list,href,para2);
  385. paramanager.freecgpara(list,para2);
  386. paramanager.freecgpara(list,para1);
  387. g_call(list,'_FPC_local_unwind');
  388. para2.done;
  389. para1.done;
  390. end;
  391. procedure tcgx86_64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  392. var
  393. opc: tasmop;
  394. begin
  395. { this code can only be used to transfer raw data, not to perform
  396. conversions }
  397. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  398. not(tosize in [OS_F32,OS_F64,OS_M64]) then
  399. internalerror(2009112505);
  400. case fromsize of
  401. OS_32,OS_S32:
  402. opc:=A_MOVD;
  403. OS_64,OS_S64:
  404. opc:=A_MOVQ;
  405. else
  406. internalerror(2009112506);
  407. end;
  408. if assigned(shuffle) and
  409. not shufflescalar(shuffle) then
  410. internalerror(2009112517);
  411. list.concat(taicpu.op_reg_reg(opc,S_NO,intreg,mmreg));
  412. end;
  413. procedure tcgx86_64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  414. var
  415. opc: tasmop;
  416. begin
  417. { this code can only be used to transfer raw data, not to perform
  418. conversions }
  419. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  420. not (fromsize in [OS_F32,OS_F64,OS_M64]) then
  421. internalerror(2009112507);
  422. case tosize of
  423. OS_32,OS_S32:
  424. opc:=A_MOVD;
  425. OS_64,OS_S64:
  426. opc:=A_MOVQ;
  427. else
  428. internalerror(2009112408);
  429. end;
  430. if assigned(shuffle) and
  431. not shufflescalar(shuffle) then
  432. internalerror(2009112515);
  433. list.concat(taicpu.op_reg_reg(opc,S_NO,mmreg,intreg));
  434. end;
  435. function tcgx86_64.use_ms_abi: boolean;
  436. begin
  437. if assigned(current_procinfo) then
  438. use_ms_abi:=x86_64_use_ms_abi(current_procinfo.procdef.proccalloption)
  439. else
  440. use_ms_abi:=target_info.system=system_x86_64_win64;
  441. end;
  442. procedure create_codegen;
  443. begin
  444. cg:=tcgx86_64.create;
  445. cg128:=tcg128.create;
  446. end;
  447. end.