cgcpu.pas 22 KB


  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. This unit implements the code generator for the x86-64.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,cgutils,cgobj,cgx86,
  22. aasmbase,aasmtai,aasmdata,aasmcpu,
  23. cpubase,parabase,
  24. symdef,
  25. symconst,rgx86,procinfo;
  26. type
  27. tcgx86_64 = class(tcgx86)
  28. procedure init_register_allocators;override;
  29. procedure a_loadfpu_ref_cgpara(list: TAsmList; size: tcgsize; const ref: treference; const cgpara: TCGPara); override;
  30. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  31. procedure g_proc_entry(list : TAsmList;localsize:longint; nostackframe:boolean);override;
  32. procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
  33. procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override;
  34. procedure g_save_registers(list: TAsmList);override;
  35. procedure g_restore_registers(list: TAsmList);override;
  36. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  37. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
  38. function use_ms_abi: boolean;
  39. private
  40. function use_push: boolean;
  41. function saved_xmm_reg_size: longint;
  42. end;
  43. procedure create_codegen;
  44. implementation
  45. uses
  46. globtype,globals,verbose,systems,cutils,cclasses,
  47. symtable,paramgr,cpupi,
  48. rgcpu,ncgutil;
  49. procedure Tcgx86_64.init_register_allocators;
  50. const
  51. win64_saved_std_regs : array[0..7] of tsuperregister = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP);
  52. others_saved_std_regs : array[0..4] of tsuperregister = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
  53. saved_regs_length : array[boolean] of longint = (5,7);
  54. win64_saved_xmm_regs : array[0..9] of tsuperregister = (RS_XMM6,RS_XMM7,
  55. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15);
  56. var
  57. i : longint;
  58. ms_abi: boolean;
  59. begin
  60. inherited init_register_allocators;
  61. ms_abi:=use_ms_abi;
  62. if (length(saved_standard_registers)<>saved_regs_length[ms_abi]) then
  63. begin
  64. if ms_abi then
  65. begin
  66. SetLength(saved_standard_registers,Length(win64_saved_std_regs));
  67. SetLength(saved_mm_registers,Length(win64_saved_xmm_regs));
  68. for i:=low(win64_saved_std_regs) to high(win64_saved_std_regs) do
  69. saved_standard_registers[i]:=win64_saved_std_regs[i];
  70. for i:=low(win64_saved_xmm_regs) to high(win64_saved_xmm_regs) do
  71. saved_mm_registers[i]:=win64_saved_xmm_regs[i];
  72. end
  73. else
  74. begin
  75. SetLength(saved_standard_registers,Length(others_saved_std_regs));
  76. SetLength(saved_mm_registers,0);
  77. for i:=low(others_saved_std_regs) to high(others_saved_std_regs) do
  78. saved_standard_registers[i]:=others_saved_std_regs[i];
  79. end;
  80. end;
  81. if ms_abi then
  82. begin
  83. if (cs_userbp in current_settings.optimizerswitches) and assigned(current_procinfo) and (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
  84. begin
  85. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
  86. RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP],first_int_imreg,[]);
  87. end
  88. else
  89. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
  90. RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[])
  91. end
  92. else
  93. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
  94. RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
  95. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
  96. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
  97. rgfpu:=Trgx86fpu.create;
  98. end;
  99. procedure tcgx86_64.a_loadfpu_ref_cgpara(list: TAsmList; size: tcgsize; const ref: treference; const cgpara: TCGPara);
  100. begin
  101. { a record containing an extended value is returned on the x87 stack
  102. -> size will be OS_F128 (if not packed), while cgpara.paraloc^.size
  103. contains the proper size
  104. In the future we should probably always use cgpara.location^.size, but
  105. that should only be tested/done after 2.8 is branched }
  106. if size in [OS_128,OS_F128] then
  107. size:=cgpara.location^.size;
  108. inherited;
  109. end;
  110. procedure tcgx86_64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  111. begin
  112. { same as with a_loadfpu_ref_cgpara() above, but on the callee side
  113. when the value is moved from the fpu register into a memory location }
  114. if tosize in [OS_128,OS_F128] then
  115. tosize:=OS_F80;
  116. inherited;
  117. end;
  118. function tcgx86_64.use_push: boolean;
  119. begin
  120. result:=(current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  121. (current_procinfo.procdef.proctypeoption=potype_exceptfilter);
  122. end;
  123. function tcgx86_64.saved_xmm_reg_size: longint;
  124. var
  125. i: longint;
  126. begin
  127. result:=0;
  128. if (target_info.system<>system_x86_64_win64) or
  129. (not uses_registers(R_MMREGISTER)) then
  130. exit;
  131. for i:=low(saved_mm_registers) to high(saved_mm_registers) do
  132. begin
  133. if (saved_mm_registers[i] in rg[R_MMREGISTER].used_in_proc) then
  134. inc(result,tcgsize2size[OS_VECTOR]);
  135. end;
  136. end;
  137. procedure tcgx86_64.g_proc_entry(list : TAsmList;localsize:longint;nostackframe:boolean);
  138. var
  139. hitem: tlinkedlistitem;
  140. seh_proc: tai_seh_directive;
  141. r: integer;
  142. href: treference;
  143. templist: TAsmList;
  144. frame_offset: longint;
  145. suppress_endprologue: boolean;
  146. stackmisalignment: longint;
  147. xmmsize: longint;
  148. procedure push_one_reg(reg: tregister);
  149. begin
  150. list.concat(taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],reg));
  151. if (target_info.system=system_x86_64_win64) then
  152. begin
  153. list.concat(cai_seh_directive.create_reg(ash_pushreg,reg));
  154. include(current_procinfo.flags,pi_has_unwind_info);
  155. end;
  156. end;
  157. procedure push_regs;
  158. var
  159. r: longint;
  160. usedregs: tcpuregisterset;
  161. begin
  162. usedregs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption);
  163. for r := low(saved_standard_registers) to high(saved_standard_registers) do
  164. if saved_standard_registers[r] in usedregs then
  165. begin
  166. inc(stackmisalignment,sizeof(pint));
  167. push_one_reg(newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE));
  168. end;
  169. end;
  170. begin
  171. hitem:=list.last;
  172. { pi_has_unwind_info may already be set at this point if there are
  173. SEH directives in assembler body. In this case, .seh_endprologue
  174. is expected to be one of those directives, and not generated here. }
  175. suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
  176. { save old framepointer }
  177. if not nostackframe then
  178. begin
  179. { return address }
  180. stackmisalignment := sizeof(pint);
  181. list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
  182. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  183. begin
  184. push_regs;
  185. CGmessage(cg_d_stackframe_omited);
  186. end
  187. else
  188. begin
  189. { push <frame_pointer> }
  190. inc(stackmisalignment,sizeof(pint));
  191. push_one_reg(NR_FRAME_POINTER_REG);
  192. { Return address and FP are both on stack }
  193. current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
  194. current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
  195. if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
  196. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
  197. else
  198. begin
  199. push_regs;
  200. gen_load_frame_for_exceptfilter(list);
  201. { Need only as much stack space as necessary to do the calls.
  202. Exception filters don't have own local vars, and temps are 'mapped'
  203. to the parent procedure.
  204. maxpushedparasize is already aligned at least on x86_64. }
  205. localsize:=current_procinfo.maxpushedparasize;
  206. end;
  207. current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FRAME_POINTER_REG);
  208. {
  209. TODO: current framepointer handling is not compatible with Win64 at all:
  210. Win64 expects FP to point to the top or into the middle of local area.
  211. In FPC it points to the bottom, making it impossible to generate
  212. UWOP_SET_FPREG unwind code if local area is > 240 bytes.
  213. So for now pretend we never have a framepointer.
  214. }
  215. end;
  216. xmmsize:=saved_xmm_reg_size;
  217. if use_push and (xmmsize<>0) then
  218. begin
  219. localsize:=align(localsize,target_info.stackalign)+xmmsize;
  220. reference_reset_base(current_procinfo.save_regs_ref,NR_STACK_POINTER_REG,
  221. localsize-xmmsize,tcgsize2size[OS_VECTOR],[]);
  222. end;
  223. { allocate stackframe space }
  224. if (localsize<>0) or
  225. ((target_info.stackalign>sizeof(pint)) and
  226. (stackmisalignment <> 0) and
  227. ((pi_do_call in current_procinfo.flags) or
  228. (po_assembler in current_procinfo.procdef.procoptions))) then
  229. begin
  230. if target_info.stackalign>sizeof(pint) then
  231. localsize := align(localsize+stackmisalignment,target_info.stackalign)-stackmisalignment;
  232. g_stackpointer_alloc(list,localsize);
  233. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  234. current_asmdata.asmcfi.cfa_def_cfa_offset(list,localsize+sizeof(pint));
  235. current_procinfo.final_localsize:=localsize;
  236. if (target_info.system=system_x86_64_win64) then
  237. begin
  238. if localsize<>0 then
  239. list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
  240. include(current_procinfo.flags,pi_has_unwind_info);
  241. if use_push and (xmmsize<>0) then
  242. begin
  243. href:=current_procinfo.save_regs_ref;
  244. for r:=low(saved_mm_registers) to high(saved_mm_registers) do
  245. if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
  246. begin
  247. a_loadmm_reg_ref(list,OS_VECTOR,OS_VECTOR,newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),href,nil);
  248. inc(href.offset,tcgsize2size[OS_VECTOR]);
  249. end;
  250. end;
  251. end;
  252. end;
  253. end;
  254. if not (pi_has_unwind_info in current_procinfo.flags) then
  255. exit;
  256. { Generate unwind data for x86_64-win64 }
  257. seh_proc:=cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname);
  258. if assigned(hitem) then
  259. list.insertafter(seh_proc,hitem)
  260. else
  261. list.insert(seh_proc);
  262. templist:=TAsmList.Create;
  263. { We need to record postive offsets from RSP; if registers are saved
  264. at negative offsets from RBP we need to account for it. }
  265. if (not use_push) then
  266. frame_offset:=current_procinfo.final_localsize
  267. else
  268. frame_offset:=0;
  269. { There's no need to describe position of register saves precisely;
  270. since registers are not modified before they are saved, and saves do not
  271. change RSP, 'logically' all saves can happen at the end of prologue. }
  272. href:=current_procinfo.save_regs_ref;
  273. if (not use_push) then
  274. begin
  275. for r:=low(saved_standard_registers) to high(saved_standard_registers) do
  276. if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
  277. begin
  278. templist.concat(cai_seh_directive.create_reg_offset(ash_savereg,
  279. newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE),
  280. href.offset+frame_offset));
  281. inc(href.offset,sizeof(aint));
  282. end;
  283. end;
  284. if uses_registers(R_MMREGISTER) then
  285. begin
  286. if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then
  287. inc(href.offset,tcgsize2size[OS_VECTOR]-(href.offset mod tcgsize2size[OS_VECTOR]));
  288. for r:=low(saved_mm_registers) to high(saved_mm_registers) do
  289. begin
  290. if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
  291. begin
  292. templist.concat(cai_seh_directive.create_reg_offset(ash_savexmm,
  293. newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),
  294. href.offset+frame_offset));
  295. inc(href.offset,tcgsize2size[OS_VECTOR]);
  296. end;
  297. end;
  298. end;
  299. if not suppress_endprologue then
  300. templist.concat(cai_seh_directive.create(ash_endprologue));
  301. if assigned(current_procinfo.endprologue_ai) then
  302. current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
  303. else
  304. list.concatlist(templist);
  305. templist.free;
  306. end;
  307. procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  308. procedure increase_sp(a : tcgint);
  309. var
  310. href : treference;
  311. begin
  312. reference_reset_base(href,NR_STACK_POINTER_REG,a,0,[]);
  313. { normally, lea is a better choice than an add }
  314. list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
  315. end;
  316. var
  317. href : treference;
  318. hreg : tregister;
  319. r : longint;
  320. begin
  321. { Prevent return address from a possible call from ending up in the epilogue }
  322. { (restoring registers happens before epilogue, providing necessary padding) }
  323. if (current_procinfo.flags*[pi_has_unwind_info,pi_do_call,pi_has_saved_regs])=[pi_has_unwind_info,pi_do_call] then
  324. list.concat(Taicpu.op_none(A_NOP));
  325. { remove stackframe }
  326. if not nostackframe then
  327. begin
  328. if use_push then
  329. begin
  330. if (saved_xmm_reg_size<>0) then
  331. begin
  332. href:=current_procinfo.save_regs_ref;
  333. for r:=low(saved_mm_registers) to high(saved_mm_registers) do
  334. if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
  335. begin
  336. { Allocate register so the optimizer does not remove the load }
  337. hreg:=newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE);
  338. a_reg_alloc(list,hreg);
  339. a_loadmm_ref_reg(list,OS_VECTOR,OS_VECTOR,href,hreg,nil);
  340. inc(href.offset,tcgsize2size[OS_VECTOR]);
  341. end;
  342. end;
  343. if (current_procinfo.final_localsize<>0) then
  344. increase_sp(current_procinfo.final_localsize);
  345. internal_restore_regs(list,true);
  346. if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
  347. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  348. end
  349. else if (target_info.system=system_x86_64_win64) then
  350. begin
  351. { Comply with Win64 unwinding mechanism, which only recognizes
  352. 'add $constant,%rsp' and 'lea offset(FPREG),%rsp' as belonging to
  353. the function epilog.
  354. Neither 'leave' nor even 'mov %FPREG,%rsp' are allowed. }
  355. reference_reset_base(href,current_procinfo.framepointer,0,sizeof(pint),[]);
  356. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],href,NR_STACK_POINTER_REG));
  357. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],current_procinfo.framepointer));
  358. end
  359. else
  360. generate_leave(list);
  361. list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
  362. end;
  363. list.concat(Taicpu.Op_none(A_RET,S_NO));
  364. if (pi_has_unwind_info in current_procinfo.flags) then
  365. begin
  366. tcpuprocinfo(current_procinfo).dump_scopes(list);
  367. list.concat(cai_seh_directive.create(ash_endproc));
  368. end;
  369. end;
  370. procedure tcgx86_64.g_save_registers(list: TAsmList);
  371. begin
  372. if (not use_push) then
  373. inherited g_save_registers(list);
  374. end;
  375. procedure tcgx86_64.g_restore_registers(list: TAsmList);
  376. begin
  377. if (not use_push) then
  378. inherited g_restore_registers(list);
  379. end;
  380. procedure tcgx86_64.g_local_unwind(list: TAsmList; l: TAsmLabel);
  381. var
  382. para1,para2: tcgpara;
  383. href: treference;
  384. pd: tprocdef;
  385. begin
  386. if (target_info.system<>system_x86_64_win64) then
  387. begin
  388. inherited g_local_unwind(list,l);
  389. exit;
  390. end;
  391. pd:=search_system_proc('_fpc_local_unwind');
  392. para1.init;
  393. para2.init;
  394. paramanager.getintparaloc(list,pd,1,para1);
  395. paramanager.getintparaloc(list,pd,2,para2);
  396. reference_reset_symbol(href,l,0,1,[]);
  397. { TODO: using RSP is correct only while the stack is fixed!!
  398. (true now, but will change if/when allocating from stack is implemented) }
  399. a_load_reg_cgpara(list,OS_ADDR,NR_STACK_POINTER_REG,para1);
  400. a_loadaddr_ref_cgpara(list,href,para2);
  401. paramanager.freecgpara(list,para2);
  402. paramanager.freecgpara(list,para1);
  403. g_call(list,'_FPC_local_unwind');
  404. para2.done;
  405. para1.done;
  406. end;
  407. procedure tcgx86_64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  408. var
  409. opc: tasmop;
  410. begin
  411. { this code can only be used to transfer raw data, not to perform
  412. conversions }
  413. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  414. not(tosize in [OS_F32,OS_F64,OS_M64]) then
  415. internalerror(2009112505);
  416. case fromsize of
  417. OS_32,OS_S32:
  418. opc:=A_MOVD;
  419. OS_64,OS_S64:
  420. opc:=A_MOVQ;
  421. else
  422. internalerror(2009112506);
  423. end;
  424. if assigned(shuffle) and
  425. not shufflescalar(shuffle) then
  426. internalerror(2009112517);
  427. list.concat(taicpu.op_reg_reg(opc,S_NO,intreg,mmreg));
  428. end;
  429. procedure tcgx86_64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  430. var
  431. opc: tasmop;
  432. begin
  433. { this code can only be used to transfer raw data, not to perform
  434. conversions }
  435. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  436. not (fromsize in [OS_F32,OS_F64,OS_M64]) then
  437. internalerror(2009112507);
  438. case tosize of
  439. OS_32,OS_S32:
  440. opc:=A_MOVD;
  441. OS_64,OS_S64:
  442. opc:=A_MOVQ;
  443. else
  444. internalerror(2009112408);
  445. end;
  446. if assigned(shuffle) and
  447. not shufflescalar(shuffle) then
  448. internalerror(2009112515);
  449. list.concat(taicpu.op_reg_reg(opc,S_NO,mmreg,intreg));
  450. end;
  451. function tcgx86_64.use_ms_abi: boolean;
  452. begin
  453. if assigned(current_procinfo) then
  454. use_ms_abi:=x86_64_use_ms_abi(current_procinfo.procdef.proccalloption)
  455. else
  456. use_ms_abi:=target_info.system=system_x86_64_win64;
  457. end;
  458. procedure create_codegen;
  459. begin
  460. cg:=tcgx86_64.create;
  461. cg128:=tcg128.create;
  462. end;
  463. end.