cgcpu.pas 22 KB


  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. This unit implements the code generator for the x86-64.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,cgutils,cgobj,cgx86,
  22. aasmbase,aasmtai,aasmdata,aasmcpu,
  23. cpubase,cpuinfo,cpupara,parabase,
  24. symdef,
  25. node,symconst,rgx86,procinfo;
  26. type
  27. tcgx86_64 = class(tcgx86)
  28. procedure init_register_allocators;override;
  29. procedure g_proc_entry(list : TAsmList;localsize:longint; nostackframe:boolean);override;
  30. procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
  31. procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
  32. procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override;
  33. procedure g_save_registers(list: TAsmList);override;
  34. procedure g_restore_registers(list: TAsmList);override;
  35. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  36. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
  37. private
  38. function use_push: boolean;
  39. function saved_xmm_reg_size: longint;
  40. end;
  41. procedure create_codegen;
  42. implementation
  43. uses
  44. globtype,globals,verbose,systems,cutils,cclasses,
  45. symsym,symtable,defutil,paramgr,fmodule,cpupi,
  46. rgobj,tgobj,rgcpu,ncgutil;
  47. procedure Tcgx86_64.init_register_allocators;
  48. const
  49. win64_saved_std_regs : array[0..6] of tsuperregister = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15);
  50. others_saved_std_regs : array[0..4] of tsuperregister = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
  51. saved_regs_length : array[boolean] of longint = (5,7);
  52. win64_saved_xmm_regs : array[0..9] of tsuperregister = (RS_XMM6,RS_XMM7,
  53. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15);
  54. var
  55. i : longint;
  56. begin
  57. inherited init_register_allocators;
  58. if (length(saved_standard_registers)<>saved_regs_length[target_info.system=system_x86_64_win64]) then
  59. begin
  60. if target_info.system=system_x86_64_win64 then
  61. begin
  62. SetLength(saved_standard_registers,Length(win64_saved_std_regs));
  63. SetLength(saved_mm_registers,Length(win64_saved_xmm_regs));
  64. for i:=low(win64_saved_std_regs) to high(win64_saved_std_regs) do
  65. saved_standard_registers[i]:=win64_saved_std_regs[i];
  66. for i:=low(win64_saved_xmm_regs) to high(win64_saved_xmm_regs) do
  67. saved_mm_registers[i]:=win64_saved_xmm_regs[i];
  68. end
  69. else
  70. begin
  71. SetLength(saved_standard_registers,Length(others_saved_std_regs));
  72. SetLength(saved_mm_registers,0);
  73. for i:=low(others_saved_std_regs) to high(others_saved_std_regs) do
  74. saved_standard_registers[i]:=others_saved_std_regs[i];
  75. end;
  76. end;
  77. if target_info.system=system_x86_64_win64 then
  78. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
  79. RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[])
  80. else
  81. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
  82. RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
  83. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
  84. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
  85. rgfpu:=Trgx86fpu.create;
  86. end;
  87. function tcgx86_64.use_push: boolean;
  88. begin
  89. result:=(current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  90. (current_procinfo.procdef.proctypeoption=potype_exceptfilter);
  91. end;
  92. function tcgx86_64.saved_xmm_reg_size: longint;
  93. var
  94. i: longint;
  95. begin
  96. result:=0;
  97. if (target_info.system<>system_x86_64_win64) or
  98. (not uses_registers(R_MMREGISTER)) then
  99. exit;
  100. for i:=low(saved_mm_registers) to high(saved_mm_registers) do
  101. begin
  102. if (saved_mm_registers[i] in rg[R_MMREGISTER].used_in_proc) then
  103. inc(result,tcgsize2size[OS_VECTOR]);
  104. end;
  105. end;
  106. procedure tcgx86_64.g_proc_entry(list : TAsmList;localsize:longint;nostackframe:boolean);
  107. var
  108. hitem: tlinkedlistitem;
  109. r: integer;
  110. href: treference;
  111. templist: TAsmList;
  112. frame_offset: longint;
  113. suppress_endprologue: boolean;
  114. stackmisalignment: longint;
  115. xmmsize: longint;
  116. procedure push_one_reg(reg: tregister);
  117. begin
  118. list.concat(taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],reg));
  119. if (target_info.system=system_x86_64_win64) then
  120. begin
  121. list.concat(cai_seh_directive.create_reg(ash_pushreg,reg));
  122. include(current_procinfo.flags,pi_has_unwind_info);
  123. end;
  124. end;
  125. procedure push_regs;
  126. var
  127. r: longint;
  128. begin
  129. for r := low(saved_standard_registers) to high(saved_standard_registers) do
  130. if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
  131. begin
  132. inc(stackmisalignment,sizeof(pint));
  133. push_one_reg(newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE));
  134. end;
  135. end;
  136. begin
  137. hitem:=list.last;
  138. { pi_has_unwind_info may already be set at this point if there are
  139. SEH directives in assembler body. In this case, .seh_endprologue
  140. is expected to be one of those directives, and not generated here. }
  141. suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
  142. { save old framepointer }
  143. if not nostackframe then
  144. begin
  145. { return address }
  146. stackmisalignment := sizeof(pint);
  147. list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
  148. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  149. begin
  150. push_regs;
  151. CGmessage(cg_d_stackframe_omited);
  152. end
  153. else
  154. begin
  155. { push <frame_pointer> }
  156. inc(stackmisalignment,sizeof(pint));
  157. push_one_reg(NR_FRAME_POINTER_REG);
  158. { Return address and FP are both on stack }
  159. current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
  160. current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
  161. if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
  162. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
  163. else
  164. begin
  165. push_regs;
  166. gen_load_frame_for_exceptfilter(list);
  167. { Need only as much stack space as necessary to do the calls.
  168. Exception filters don't have own local vars, and temps are 'mapped'
  169. to the parent procedure.
  170. maxpushedparasize is already aligned at least on x86_64. }
  171. localsize:=current_procinfo.maxpushedparasize;
  172. end;
  173. current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FRAME_POINTER_REG);
  174. {
  175. TODO: current framepointer handling is not compatible with Win64 at all:
  176. Win64 expects FP to point to the top or into the middle of local area.
  177. In FPC it points to the bottom, making it impossible to generate
  178. UWOP_SET_FPREG unwind code if local area is > 240 bytes.
  179. So for now pretend we never have a framepointer.
  180. }
  181. end;
  182. xmmsize:=saved_xmm_reg_size;
  183. if use_push and (xmmsize<>0) then
  184. begin
  185. localsize:=align(localsize,target_info.stackalign)+xmmsize;
  186. reference_reset_base(current_procinfo.save_regs_ref,NR_STACK_POINTER_REG,
  187. localsize-xmmsize,tcgsize2size[OS_VECTOR]);
  188. end;
  189. { allocate stackframe space }
  190. if (localsize<>0) or
  191. ((target_info.stackalign>sizeof(pint)) and
  192. (stackmisalignment <> 0) and
  193. ((pi_do_call in current_procinfo.flags) or
  194. (po_assembler in current_procinfo.procdef.procoptions))) then
  195. begin
  196. if target_info.stackalign>sizeof(pint) then
  197. localsize := align(localsize+stackmisalignment,target_info.stackalign)-stackmisalignment;
  198. cg.g_stackpointer_alloc(list,localsize);
  199. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  200. current_asmdata.asmcfi.cfa_def_cfa_offset(list,localsize+sizeof(pint));
  201. current_procinfo.final_localsize:=localsize;
  202. if (target_info.system=system_x86_64_win64) then
  203. begin
  204. if localsize<>0 then
  205. list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
  206. include(current_procinfo.flags,pi_has_unwind_info);
  207. if use_push and (xmmsize<>0) then
  208. begin
  209. href:=current_procinfo.save_regs_ref;
  210. for r:=low(saved_mm_registers) to high(saved_mm_registers) do
  211. if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
  212. begin
  213. a_loadmm_reg_ref(list,OS_VECTOR,OS_VECTOR,newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),href,nil);
  214. inc(href.offset,tcgsize2size[OS_VECTOR]);
  215. end;
  216. end;
  217. end;
  218. end;
  219. end;
  220. if not (pi_has_unwind_info in current_procinfo.flags) then
  221. exit;
  222. { Generate unwind data for x86_64-win64 }
  223. list.insertafter(cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname),hitem);
  224. templist:=TAsmList.Create;
  225. { We need to record postive offsets from RSP; if registers are saved
  226. at negative offsets from RBP we need to account for it. }
  227. if (not use_push) then
  228. frame_offset:=current_procinfo.final_localsize
  229. else
  230. frame_offset:=0;
  231. { There's no need to describe position of register saves precisely;
  232. since registers are not modified before they are saved, and saves do not
  233. change RSP, 'logically' all saves can happen at the end of prologue. }
  234. href:=current_procinfo.save_regs_ref;
  235. if (not use_push) then
  236. begin
  237. for r:=low(saved_standard_registers) to high(saved_standard_registers) do
  238. if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
  239. begin
  240. templist.concat(cai_seh_directive.create_reg_offset(ash_savereg,
  241. newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE),
  242. href.offset+frame_offset));
  243. inc(href.offset,sizeof(aint));
  244. end;
  245. end;
  246. if uses_registers(R_MMREGISTER) then
  247. begin
  248. if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then
  249. inc(href.offset,tcgsize2size[OS_VECTOR]-(href.offset mod tcgsize2size[OS_VECTOR]));
  250. for r:=low(saved_mm_registers) to high(saved_mm_registers) do
  251. begin
  252. if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
  253. begin
  254. templist.concat(cai_seh_directive.create_reg_offset(ash_savexmm,
  255. newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),
  256. href.offset+frame_offset));
  257. inc(href.offset,tcgsize2size[OS_VECTOR]);
  258. end;
  259. end;
  260. end;
  261. if not suppress_endprologue then
  262. templist.concat(cai_seh_directive.create(ash_endprologue));
  263. if assigned(current_procinfo.endprologue_ai) then
  264. current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
  265. else
  266. list.concatlist(templist);
  267. templist.free;
  268. end;
  269. procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  270. procedure increase_sp(a : tcgint);
  271. var
  272. href : treference;
  273. begin
  274. reference_reset_base(href,NR_STACK_POINTER_REG,a,0);
  275. { normally, lea is a better choice than an add }
  276. list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
  277. end;
  278. var
  279. href : treference;
  280. hreg : tregister;
  281. r : longint;
  282. begin
  283. { Release PIC register }
  284. if cs_create_pic in current_settings.moduleswitches then
  285. list.concat(tai_regalloc.dealloc(NR_PIC_OFFSET_REG,nil));
  286. { Prevent return address from a possible call from ending up in the epilogue }
  287. { (restoring registers happens before epilogue, providing necessary padding) }
  288. if (current_procinfo.flags*[pi_has_unwind_info,pi_do_call,pi_has_saved_regs])=[pi_has_unwind_info,pi_do_call] then
  289. list.concat(Taicpu.op_none(A_NOP));
  290. { remove stackframe }
  291. if not nostackframe then
  292. begin
  293. if use_push then
  294. begin
  295. if (saved_xmm_reg_size<>0) then
  296. begin
  297. href:=current_procinfo.save_regs_ref;
  298. for r:=low(saved_mm_registers) to high(saved_mm_registers) do
  299. if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
  300. begin
  301. { Allocate register so the optimizer does not remove the load }
  302. hreg:=newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE);
  303. a_reg_alloc(list,hreg);
  304. a_loadmm_ref_reg(list,OS_VECTOR,OS_VECTOR,href,hreg,nil);
  305. inc(href.offset,tcgsize2size[OS_VECTOR]);
  306. end;
  307. end;
  308. if (current_procinfo.final_localsize<>0) then
  309. increase_sp(current_procinfo.final_localsize);
  310. internal_restore_regs(list,true);
  311. if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
  312. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  313. end
  314. else if (target_info.system=system_x86_64_win64) then
  315. begin
  316. { Comply with Win64 unwinding mechanism, which only recognizes
  317. 'add $constant,%rsp' and 'lea offset(FPREG),%rsp' as belonging to
  318. the function epilog.
  319. Neither 'leave' nor even 'mov %FPREG,%rsp' are allowed. }
  320. reference_reset_base(href,current_procinfo.framepointer,0,sizeof(pint));
  321. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],href,NR_STACK_POINTER_REG));
  322. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],current_procinfo.framepointer));
  323. end
  324. else
  325. list.concat(Taicpu.op_none(A_LEAVE,S_NO));
  326. list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
  327. end;
  328. list.concat(Taicpu.Op_none(A_RET,S_NO));
  329. if (pi_has_unwind_info in current_procinfo.flags) then
  330. begin
  331. tx86_64procinfo(current_procinfo).dump_scopes(list);
  332. list.concat(cai_seh_directive.create(ash_endproc));
  333. end;
  334. end;
  335. procedure tcgx86_64.g_save_registers(list: TAsmList);
  336. begin
  337. if (not use_push) then
  338. inherited g_save_registers(list);
  339. end;
  340. procedure tcgx86_64.g_restore_registers(list: TAsmList);
  341. begin
  342. if (not use_push) then
  343. inherited g_restore_registers(list);
  344. end;
  345. procedure tcgx86_64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
  346. var
  347. make_global : boolean;
  348. href : treference;
  349. sym : tasmsymbol;
  350. r : treference;
  351. begin
  352. if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
  353. Internalerror(200006137);
  354. if not assigned(procdef.struct) or
  355. (procdef.procoptions*[po_classmethod, po_staticmethod,
  356. po_methodpointer, po_interrupt, po_iocheck]<>[]) then
  357. Internalerror(200006138);
  358. if procdef.owner.symtabletype<>ObjectSymtable then
  359. Internalerror(200109191);
  360. make_global:=false;
  361. if (not current_module.is_unit) or create_smartlink or
  362. (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
  363. make_global:=true;
  364. if make_global then
  365. List.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
  366. else
  367. List.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
  368. { set param1 interface to self }
  369. g_adjust_self_value(list,procdef,ioffset);
  370. if (po_virtualmethod in procdef.procoptions) and
  371. not is_objectpascal_helper(procdef.struct) then
  372. begin
  373. if (procdef.extnumber=$ffff) then
  374. Internalerror(200006139);
  375. { load vmt from first paramter }
  376. { win64 uses a different abi }
  377. if target_info.system=system_x86_64_win64 then
  378. reference_reset_base(href,NR_RCX,0,sizeof(pint))
  379. else
  380. reference_reset_base(href,NR_RDI,0,sizeof(pint));
  381. cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_RAX);
  382. { jmp *vmtoffs(%eax) ; method offs }
  383. reference_reset_base(href,NR_RAX,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
  384. list.concat(taicpu.op_ref(A_JMP,S_Q,href));
  385. end
  386. else
  387. begin
  388. sym:=current_asmdata.RefAsmSymbol(procdef.mangledname);
  389. reference_reset_symbol(r,sym,0,sizeof(pint));
  390. if (cs_create_pic in current_settings.moduleswitches) and
  391. { darwin/x86_64's assembler doesn't want @PLT after call symbols }
  392. (target_info.system<>system_x86_64_darwin) then
  393. r.refaddr:=addr_pic
  394. else
  395. r.refaddr:=addr_full;
  396. list.concat(taicpu.op_ref(A_JMP,S_NO,r));
  397. end;
  398. List.concat(Tai_symbol_end.Createname(labelname));
  399. end;
  400. procedure tcgx86_64.g_local_unwind(list: TAsmList; l: TAsmLabel);
  401. var
  402. para1,para2: tcgpara;
  403. href: treference;
  404. pd: tprocdef;
  405. begin
  406. if (target_info.system<>system_x86_64_win64) then
  407. begin
  408. inherited g_local_unwind(list,l);
  409. exit;
  410. end;
  411. pd:=search_system_proc('_fpc_local_unwind');
  412. para1.init;
  413. para2.init;
  414. paramanager.getintparaloc(pd,1,para1);
  415. paramanager.getintparaloc(pd,2,para2);
  416. reference_reset_symbol(href,l,0,1);
  417. { TODO: using RSP is correct only while the stack is fixed!!
  418. (true now, but will change if/when allocating from stack is implemented) }
  419. a_load_reg_cgpara(list,OS_ADDR,NR_STACK_POINTER_REG,para1);
  420. a_loadaddr_ref_cgpara(list,href,para2);
  421. paramanager.freecgpara(list,para2);
  422. paramanager.freecgpara(list,para1);
  423. g_call(list,'_FPC_local_unwind');
  424. para2.done;
  425. para1.done;
  426. end;
  427. procedure tcgx86_64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  428. var
  429. opc: tasmop;
  430. begin
  431. { this code can only be used to transfer raw data, not to perform
  432. conversions }
  433. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  434. not(tosize in [OS_F32,OS_F64,OS_M64]) then
  435. internalerror(2009112505);
  436. case fromsize of
  437. OS_32,OS_S32:
  438. opc:=A_MOVD;
  439. OS_64,OS_S64:
  440. opc:=A_MOVQ;
  441. else
  442. internalerror(2009112506);
  443. end;
  444. if assigned(shuffle) and
  445. not shufflescalar(shuffle) then
  446. internalerror(2009112517);
  447. list.concat(taicpu.op_reg_reg(opc,S_NO,intreg,mmreg));
  448. end;
  449. procedure tcgx86_64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  450. var
  451. opc: tasmop;
  452. begin
  453. { this code can only be used to transfer raw data, not to perform
  454. conversions }
  455. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  456. not (fromsize in [OS_F32,OS_F64,OS_M64]) then
  457. internalerror(2009112507);
  458. case tosize of
  459. OS_32,OS_S32:
  460. opc:=A_MOVD;
  461. OS_64,OS_S64:
  462. opc:=A_MOVQ;
  463. else
  464. internalerror(2009112408);
  465. end;
  466. if assigned(shuffle) and
  467. not shufflescalar(shuffle) then
  468. internalerror(2009112515);
  469. list.concat(taicpu.op_reg_reg(opc,S_NO,mmreg,intreg));
  470. end;
  471. procedure create_codegen;
  472. begin
  473. cg:=tcgx86_64.create;
  474. cg128:=tcg128.create;
  475. end;
  476. end.