cgcpu.pas 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. This unit implements the code generator for the x86-64.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,cgutils,cgobj,cgx86,
  22. aasmbase,aasmtai,aasmdata,aasmcpu,
  23. cpubase,cpuinfo,cpupara,parabase,
  24. symdef,
  25. node,symconst,rgx86,procinfo;
  26. type
  27. tcgx86_64 = class(tcgx86)
  28. procedure init_register_allocators;override;
  29. procedure g_proc_entry(list : TAsmList;localsize:longint; nostackframe:boolean);override;
  30. procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
  31. procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
  32. procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override;
  33. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  34. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
  35. end;
  36. procedure create_codegen;
  37. implementation
  38. uses
  39. globtype,globals,verbose,systems,cutils,cclasses,
  40. symsym,symtable,defutil,paramgr,fmodule,cpupi,
  41. rgobj,tgobj,rgcpu;
  42. procedure Tcgx86_64.init_register_allocators;
  43. const
  44. win64_saved_std_regs : array[0..6] of tsuperregister = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15);
  45. others_saved_std_regs : array[0..4] of tsuperregister = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
  46. saved_regs_length : array[boolean] of longint = (5,7);
  47. win64_saved_xmm_regs : array[0..9] of tsuperregister = (RS_XMM6,RS_XMM7,
  48. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15);
  49. var
  50. i : longint;
  51. begin
  52. inherited init_register_allocators;
  53. if (length(saved_standard_registers)<>saved_regs_length[target_info.system=system_x86_64_win64]) then
  54. begin
  55. if target_info.system=system_x86_64_win64 then
  56. begin
  57. SetLength(saved_standard_registers,Length(win64_saved_std_regs));
  58. SetLength(saved_mm_registers,Length(win64_saved_xmm_regs));
  59. for i:=low(win64_saved_std_regs) to high(win64_saved_std_regs) do
  60. saved_standard_registers[i]:=win64_saved_std_regs[i];
  61. for i:=low(win64_saved_xmm_regs) to high(win64_saved_xmm_regs) do
  62. saved_mm_registers[i]:=win64_saved_xmm_regs[i];
  63. end
  64. else
  65. begin
  66. SetLength(saved_standard_registers,Length(others_saved_std_regs));
  67. SetLength(saved_mm_registers,0);
  68. for i:=low(others_saved_std_regs) to high(others_saved_std_regs) do
  69. saved_standard_registers[i]:=others_saved_std_regs[i];
  70. end;
  71. end;
  72. if target_info.system=system_x86_64_win64 then
  73. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
  74. RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[])
  75. else
  76. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
  77. RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
  78. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
  79. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
  80. rgfpu:=Trgx86fpu.create;
  81. end;
  82. procedure tcgx86_64.g_proc_entry(list : TAsmList;localsize:longint;nostackframe:boolean);
  83. var
  84. hitem: tlinkedlistitem;
  85. r: integer;
  86. href: treference;
  87. templist: TAsmList;
  88. frame_offset: longint;
  89. suppress_endprologue: boolean;
  90. stackmisalignment: longint;
  91. para: tparavarsym;
  92. begin
  93. hitem:=list.last;
  94. { pi_has_unwind_info may already be set at this point if there are
  95. SEH directives in assembler body. In this case, .seh_endprologue
  96. is expected to be one of those directives, and not generated here. }
  97. suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
  98. { save old framepointer }
  99. if not nostackframe then
  100. begin
  101. { return address }
  102. stackmisalignment := sizeof(pint);
  103. list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
  104. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  105. CGmessage(cg_d_stackframe_omited)
  106. else
  107. begin
  108. { push <frame_pointer> }
  109. inc(stackmisalignment,sizeof(pint));
  110. list.concat(Taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  111. if (target_info.system=system_x86_64_win64) then
  112. begin
  113. list.concat(cai_seh_directive.create_reg(ash_pushreg,NR_FRAME_POINTER_REG));
  114. include(current_procinfo.flags,pi_has_unwind_info);
  115. end;
  116. { Return address and FP are both on stack }
  117. current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
  118. current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
  119. if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
  120. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
  121. else
  122. begin
  123. { load framepointer from hidden $parentfp parameter }
  124. para:=tparavarsym(current_procinfo.procdef.paras[0]);
  125. if not (vo_is_parentfp in para.varoptions) then
  126. InternalError(201201142);
  127. if (para.paraloc[calleeside].location^.loc<>LOC_REGISTER) or
  128. (para.paraloc[calleeside].location^.next<>nil) then
  129. InternalError(201201143);
  130. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],
  131. para.paraloc[calleeside].location^.register,NR_FRAME_POINTER_REG));
  132. { Need only as much stack space as necessary to do the calls.
  133. Exception filters don't have own local vars, and temps are 'mapped'
  134. to the parent procedure.
  135. maxpushedparasize is already aligned at least on x86_64. }
  136. localsize:=current_procinfo.maxpushedparasize;
  137. end;
  138. current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FRAME_POINTER_REG);
  139. {
  140. TODO: current framepointer handling is not compatible with Win64 at all:
  141. Win64 expects FP to point to the top or into the middle of local area.
  142. In FPC it points to the bottom, making it impossible to generate
  143. UWOP_SET_FPREG unwind code if local area is > 240 bytes.
  144. So for now pretend we never have a framepointer.
  145. }
  146. end;
  147. { allocate stackframe space }
  148. if (localsize<>0) or
  149. ((target_info.stackalign>sizeof(pint)) and
  150. (stackmisalignment <> 0) and
  151. ((pi_do_call in current_procinfo.flags) or
  152. (po_assembler in current_procinfo.procdef.procoptions))) then
  153. begin
  154. if target_info.stackalign>sizeof(pint) then
  155. localsize := align(localsize+stackmisalignment,target_info.stackalign)-stackmisalignment;
  156. cg.g_stackpointer_alloc(list,localsize);
  157. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  158. current_asmdata.asmcfi.cfa_def_cfa_offset(list,localsize+sizeof(pint));
  159. current_procinfo.final_localsize:=localsize;
  160. if (target_info.system=system_x86_64_win64) then
  161. begin
  162. if localsize<>0 then
  163. list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
  164. include(current_procinfo.flags,pi_has_unwind_info);
  165. end;
  166. end;
  167. end;
  168. if not (pi_has_unwind_info in current_procinfo.flags) then
  169. exit;
  170. { Generate unwind data for x86_64-win64 }
  171. list.insertafter(cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname),hitem);
  172. templist:=TAsmList.Create;
  173. { We need to record postive offsets from RSP; if registers are saved
  174. at negative offsets from RBP we need to account for it. }
  175. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  176. frame_offset:=current_procinfo.final_localsize
  177. else
  178. frame_offset:=0;
  179. { There's no need to describe position of register saves precisely;
  180. since registers are not modified before they are saved, and saves do not
  181. change RSP, 'logically' all saves can happen at the end of prologue. }
  182. href:=current_procinfo.save_regs_ref;
  183. for r:=low(saved_standard_registers) to high(saved_standard_registers) do
  184. if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
  185. begin
  186. templist.concat(cai_seh_directive.create_reg_offset(ash_savereg,
  187. newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE),
  188. href.offset+frame_offset));
  189. inc(href.offset,sizeof(aint));
  190. end;
  191. if uses_registers(R_MMREGISTER) then
  192. begin
  193. if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then
  194. inc(href.offset,tcgsize2size[OS_VECTOR]-(href.offset mod tcgsize2size[OS_VECTOR]));
  195. for r:=low(saved_mm_registers) to high(saved_mm_registers) do
  196. begin
  197. if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
  198. begin
  199. templist.concat(cai_seh_directive.create_reg_offset(ash_savexmm,
  200. newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),
  201. href.offset+frame_offset));
  202. inc(href.offset,tcgsize2size[OS_VECTOR]);
  203. end;
  204. end;
  205. end;
  206. if not suppress_endprologue then
  207. templist.concat(cai_seh_directive.create(ash_endprologue));
  208. if assigned(current_procinfo.endprologue_ai) then
  209. current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
  210. else
  211. list.concatlist(templist);
  212. templist.free;
  213. end;
  214. procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  215. procedure increase_sp(a : tcgint);
  216. var
  217. href : treference;
  218. begin
  219. reference_reset_base(href,NR_STACK_POINTER_REG,a,0);
  220. { normally, lea is a better choice than an add }
  221. list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
  222. end;
  223. var
  224. href : treference;
  225. begin
  226. { Release PIC register }
  227. if cs_create_pic in current_settings.moduleswitches then
  228. list.concat(tai_regalloc.dealloc(NR_PIC_OFFSET_REG,nil));
  229. { Prevent return address from a possible call from ending up in the epilogue }
  230. { (restoring registers happens before epilogue, providing necessary padding) }
  231. if (current_procinfo.flags*[pi_has_unwind_info,pi_do_call,pi_has_saved_regs])=[pi_has_unwind_info,pi_do_call] then
  232. list.concat(Taicpu.op_none(A_NOP));
  233. { remove stackframe }
  234. if not nostackframe then
  235. begin
  236. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  237. (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
  238. begin
  239. if (current_procinfo.final_localsize<>0) then
  240. increase_sp(current_procinfo.final_localsize);
  241. if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
  242. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  243. end
  244. else if (target_info.system=system_x86_64_win64) then
  245. begin
  246. { Comply with Win64 unwinding mechanism, which only recognizes
  247. 'add $constant,%rsp' and 'lea offset(FPREG),%rsp' as belonging to
  248. the function epilog.
  249. Neither 'leave' nor even 'mov %FPREG,%rsp' are allowed. }
  250. reference_reset_base(href,current_procinfo.framepointer,0,sizeof(pint));
  251. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],href,NR_STACK_POINTER_REG));
  252. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],current_procinfo.framepointer));
  253. end
  254. else
  255. list.concat(Taicpu.op_none(A_LEAVE,S_NO));
  256. list.concat(tai_regalloc.dealloc(NR_FRAME_POINTER_REG,nil));
  257. end;
  258. list.concat(Taicpu.Op_none(A_RET,S_NO));
  259. if (pi_has_unwind_info in current_procinfo.flags) then
  260. begin
  261. tx86_64procinfo(current_procinfo).dump_scopes(list);
  262. list.concat(cai_seh_directive.create(ash_endproc));
  263. end;
  264. end;
  265. procedure tcgx86_64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
  266. var
  267. make_global : boolean;
  268. href : treference;
  269. sym : tasmsymbol;
  270. r : treference;
  271. begin
  272. if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
  273. Internalerror(200006137);
  274. if not assigned(procdef.struct) or
  275. (procdef.procoptions*[po_classmethod, po_staticmethod,
  276. po_methodpointer, po_interrupt, po_iocheck]<>[]) then
  277. Internalerror(200006138);
  278. if procdef.owner.symtabletype<>ObjectSymtable then
  279. Internalerror(200109191);
  280. make_global:=false;
  281. if (not current_module.is_unit) or create_smartlink or
  282. (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
  283. make_global:=true;
  284. if make_global then
  285. List.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
  286. else
  287. List.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
  288. { set param1 interface to self }
  289. g_adjust_self_value(list,procdef,ioffset);
  290. if (po_virtualmethod in procdef.procoptions) and
  291. not is_objectpascal_helper(procdef.struct) then
  292. begin
  293. if (procdef.extnumber=$ffff) then
  294. Internalerror(200006139);
  295. { load vmt from first paramter }
  296. { win64 uses a different abi }
  297. if target_info.system=system_x86_64_win64 then
  298. reference_reset_base(href,NR_RCX,0,sizeof(pint))
  299. else
  300. reference_reset_base(href,NR_RDI,0,sizeof(pint));
  301. cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_RAX);
  302. { jmp *vmtoffs(%eax) ; method offs }
  303. reference_reset_base(href,NR_RAX,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
  304. list.concat(taicpu.op_ref(A_JMP,S_Q,href));
  305. end
  306. else
  307. begin
  308. sym:=current_asmdata.RefAsmSymbol(procdef.mangledname);
  309. reference_reset_symbol(r,sym,0,sizeof(pint));
  310. if (cs_create_pic in current_settings.moduleswitches) and
  311. { darwin/x86_64's assembler doesn't want @PLT after call symbols }
  312. (target_info.system<>system_x86_64_darwin) then
  313. r.refaddr:=addr_pic
  314. else
  315. r.refaddr:=addr_full;
  316. list.concat(taicpu.op_ref(A_JMP,S_NO,r));
  317. end;
  318. List.concat(Tai_symbol_end.Createname(labelname));
  319. end;
  320. procedure tcgx86_64.g_local_unwind(list: TAsmList; l: TAsmLabel);
  321. var
  322. para1,para2: tcgpara;
  323. href: treference;
  324. pd: tprocdef;
  325. begin
  326. if (target_info.system<>system_x86_64_win64) then
  327. begin
  328. inherited g_local_unwind(list,l);
  329. exit;
  330. end;
  331. pd:=search_system_proc('_fpc_local_unwind');
  332. para1.init;
  333. para2.init;
  334. paramanager.getintparaloc(pd,1,para1);
  335. paramanager.getintparaloc(pd,2,para2);
  336. reference_reset_symbol(href,l,0,1);
  337. { TODO: using RSP is correct only while the stack is fixed!!
  338. (true now, but will change if/when allocating from stack is implemented) }
  339. a_load_reg_cgpara(list,OS_ADDR,NR_STACK_POINTER_REG,para1);
  340. a_loadaddr_ref_cgpara(list,href,para2);
  341. paramanager.freecgpara(list,para2);
  342. paramanager.freecgpara(list,para1);
  343. g_call(current_asmdata.CurrAsmList,'_FPC_local_unwind');
  344. para2.done;
  345. para1.done;
  346. end;
  347. procedure tcgx86_64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  348. var
  349. opc: tasmop;
  350. begin
  351. { this code can only be used to transfer raw data, not to perform
  352. conversions }
  353. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  354. not(tosize in [OS_F32,OS_F64,OS_M64]) then
  355. internalerror(2009112505);
  356. case fromsize of
  357. OS_32,OS_S32:
  358. opc:=A_MOVD;
  359. OS_64,OS_S64:
  360. opc:=A_MOVQ;
  361. else
  362. internalerror(2009112506);
  363. end;
  364. if assigned(shuffle) and
  365. not shufflescalar(shuffle) then
  366. internalerror(2009112517);
  367. list.concat(taicpu.op_reg_reg(opc,S_NO,intreg,mmreg));
  368. end;
  369. procedure tcgx86_64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  370. var
  371. opc: tasmop;
  372. begin
  373. { this code can only be used to transfer raw data, not to perform
  374. conversions }
  375. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  376. not (fromsize in [OS_F32,OS_F64,OS_M64]) then
  377. internalerror(2009112507);
  378. case tosize of
  379. OS_32,OS_S32:
  380. opc:=A_MOVD;
  381. OS_64,OS_S64:
  382. opc:=A_MOVQ;
  383. else
  384. internalerror(2009112408);
  385. end;
  386. if assigned(shuffle) and
  387. not shufflescalar(shuffle) then
  388. internalerror(2009112515);
  389. list.concat(taicpu.op_reg_reg(opc,S_NO,mmreg,intreg));
  390. end;
  391. procedure create_codegen;
  392. begin
  393. cg:=tcgx86_64.create;
  394. cg128:=tcg128.create;
  395. end;
  396. end.