cgcpu.pas 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. This unit implements the code generator for the x86-64.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,cgutils,cgobj,cgx86,
  22. aasmbase,aasmtai,aasmdata,aasmcpu,
  23. cpubase,cpuinfo,cpupara,parabase,
  24. symdef,
  25. node,symconst,rgx86,procinfo;
  26. type
  27. tcgx86_64 = class(tcgx86)
  28. procedure init_register_allocators;override;
  29. procedure g_proc_entry(list : TAsmList; parasize:longint; nostackframe:boolean);override;
  30. procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
  31. procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
  32. procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override;
  33. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  34. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
  35. end;
  36. procedure create_codegen;
  37. implementation
  38. uses
  39. globtype,globals,verbose,systems,cutils,cclasses,
  40. symsym,symtable,defutil,paramgr,fmodule,cpupi,
  41. rgobj,tgobj,rgcpu;
  42. procedure Tcgx86_64.init_register_allocators;
  43. const
  44. win64_saved_std_regs : array[0..6] of tsuperregister = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15);
  45. others_saved_std_regs : array[0..4] of tsuperregister = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
  46. saved_regs_length : array[boolean] of longint = (5,7);
  47. win64_saved_xmm_regs : array[0..9] of tsuperregister = (RS_XMM6,RS_XMM7,
  48. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15);
  49. var
  50. i : longint;
  51. framepointer : tsuperregister;
  52. begin
  53. inherited init_register_allocators;
  54. if (length(saved_standard_registers)<>saved_regs_length[target_info.system=system_x86_64_win64]) then
  55. begin
  56. if target_info.system=system_x86_64_win64 then
  57. begin
  58. SetLength(saved_standard_registers,Length(win64_saved_std_regs));
  59. SetLength(saved_mm_registers,Length(win64_saved_xmm_regs));
  60. for i:=low(win64_saved_std_regs) to high(win64_saved_std_regs) do
  61. saved_standard_registers[i]:=win64_saved_std_regs[i];
  62. for i:=low(win64_saved_xmm_regs) to high(win64_saved_xmm_regs) do
  63. saved_mm_registers[i]:=win64_saved_xmm_regs[i];
  64. end
  65. else
  66. begin
  67. SetLength(saved_standard_registers,Length(others_saved_std_regs));
  68. SetLength(saved_mm_registers,0);
  69. for i:=low(others_saved_std_regs) to high(others_saved_std_regs) do
  70. saved_standard_registers[i]:=others_saved_std_regs[i];
  71. end;
  72. end;
  73. if assigned(current_procinfo) then
  74. framepointer:=getsupreg(current_procinfo.framepointer)
  75. else
  76. { in intf. wrapper code generation }
  77. framepointer:=RS_FRAME_POINTER_REG;
  78. if target_info.system=system_x86_64_win64 then
  79. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
  80. RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[framepointer])
  81. else
  82. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
  83. RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[framepointer]);
  84. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
  85. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
  86. rgfpu:=Trgx86fpu.create;
  87. end;
  88. procedure tcgx86_64.g_proc_entry(list : TAsmList;parasize:longint;nostackframe:boolean);
  89. var
  90. hitem: tlinkedlistitem;
  91. r: integer;
  92. href: treference;
  93. templist: TAsmList;
  94. frame_offset: longint;
  95. suppress_endprologue: boolean;
  96. begin
  97. hitem:=list.last;
  98. { pi_has_unwind_info may already be set at this point if there are
  99. SEH directives in assembler body. In this case, .seh_endprologue
  100. is expected to be one of those directives, and not generated here. }
  101. suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
  102. inherited g_proc_entry(list,parasize,nostackframe);
  103. if not (pi_has_unwind_info in current_procinfo.flags) then
  104. exit;
  105. { Generate unwind data for x86_64-win64 }
  106. list.insertafter(cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname),hitem);
  107. templist:=TAsmList.Create;
  108. { We need to record postive offsets from RSP; if registers are saved
  109. at negative offsets from RBP we need to account for it. }
  110. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  111. frame_offset:=current_procinfo.final_localsize
  112. else
  113. frame_offset:=0;
  114. { There's no need to describe position of register saves precisely;
  115. since registers are not modified before they are saved, and saves do not
  116. change RSP, 'logically' all saves can happen at the end of prologue. }
  117. href:=current_procinfo.save_regs_ref;
  118. for r:=low(saved_standard_registers) to high(saved_standard_registers) do
  119. if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
  120. begin
  121. templist.concat(cai_seh_directive.create_reg_offset(ash_savereg,
  122. newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE),
  123. href.offset+frame_offset));
  124. inc(href.offset,sizeof(aint));
  125. end;
  126. if uses_registers(R_MMREGISTER) then
  127. begin
  128. if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then
  129. inc(href.offset,tcgsize2size[OS_VECTOR]-(href.offset mod tcgsize2size[OS_VECTOR]));
  130. for r:=low(saved_mm_registers) to high(saved_mm_registers) do
  131. begin
  132. if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
  133. begin
  134. templist.concat(cai_seh_directive.create_reg_offset(ash_savexmm,
  135. newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),
  136. href.offset+frame_offset));
  137. inc(href.offset,tcgsize2size[OS_VECTOR]);
  138. end;
  139. end;
  140. end;
  141. if not suppress_endprologue then
  142. templist.concat(cai_seh_directive.create(ash_endprologue));
  143. if assigned(current_procinfo.endprologue_ai) then
  144. current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
  145. else
  146. list.concatlist(templist);
  147. templist.free;
  148. end;
  149. procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  150. var
  151. href : treference;
  152. begin
  153. { Release PIC register }
  154. if cs_create_pic in current_settings.moduleswitches then
  155. list.concat(tai_regalloc.dealloc(NR_PIC_OFFSET_REG,nil));
  156. { Prevent return address from a possible call from ending up in the epilogue }
  157. { (restoring registers happens before epilogue, providing necessary padding) }
  158. if (current_procinfo.flags*[pi_has_unwind_info,pi_do_call,pi_has_saved_regs])=[pi_has_unwind_info,pi_do_call] then
  159. list.concat(Taicpu.op_none(A_NOP));
  160. { remove stackframe }
  161. if not nostackframe then
  162. begin
  163. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  164. (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
  165. begin
  166. if (current_procinfo.final_localsize<>0) then
  167. cg.a_op_const_reg(list,OP_ADD,OS_ADDR,current_procinfo.final_localsize,NR_STACK_POINTER_REG);
  168. if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
  169. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  170. end
  171. else if (target_info.system=system_x86_64_win64) then
  172. begin
  173. { Comply with Win64 unwinding mechanism, which only recognizes
  174. 'add $constant,%rsp' and 'lea offset(FPREG),%rsp' as belonging to
  175. the function epilog.
  176. Neither 'leave' nor even 'mov %FPREG,%rsp' are allowed. }
  177. reference_reset_base(href,current_procinfo.framepointer,0,sizeof(pint));
  178. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],href,NR_STACK_POINTER_REG));
  179. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],current_procinfo.framepointer));
  180. end
  181. else
  182. list.concat(Taicpu.op_none(A_LEAVE,S_NO));
  183. list.concat(tai_regalloc.dealloc(NR_FRAME_POINTER_REG,nil));
  184. end;
  185. list.concat(Taicpu.Op_none(A_RET,S_NO));
  186. if (pi_has_unwind_info in current_procinfo.flags) then
  187. begin
  188. tx86_64procinfo(current_procinfo).dump_scopes(list);
  189. list.concat(cai_seh_directive.create(ash_endproc));
  190. end;
  191. end;
  192. procedure tcgx86_64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
  193. var
  194. make_global : boolean;
  195. href : treference;
  196. sym : tasmsymbol;
  197. r : treference;
  198. begin
  199. if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
  200. Internalerror(200006137);
  201. if not assigned(procdef.struct) or
  202. (procdef.procoptions*[po_classmethod, po_staticmethod,
  203. po_methodpointer, po_interrupt, po_iocheck]<>[]) then
  204. Internalerror(200006138);
  205. if procdef.owner.symtabletype<>ObjectSymtable then
  206. Internalerror(200109191);
  207. make_global:=false;
  208. if (not current_module.is_unit) or create_smartlink or
  209. (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
  210. make_global:=true;
  211. if make_global then
  212. List.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
  213. else
  214. List.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
  215. { set param1 interface to self }
  216. g_adjust_self_value(list,procdef,ioffset);
  217. if (po_virtualmethod in procdef.procoptions) and
  218. not is_objectpascal_helper(procdef.struct) then
  219. begin
  220. if (procdef.extnumber=$ffff) then
  221. Internalerror(200006139);
  222. { load vmt from first paramter }
  223. { win64 uses a different abi }
  224. if target_info.system=system_x86_64_win64 then
  225. reference_reset_base(href,NR_RCX,0,sizeof(pint))
  226. else
  227. reference_reset_base(href,NR_RDI,0,sizeof(pint));
  228. cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_RAX);
  229. { jmp *vmtoffs(%eax) ; method offs }
  230. reference_reset_base(href,NR_RAX,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
  231. list.concat(taicpu.op_ref_reg(A_MOV,S_Q,href,NR_RAX));
  232. list.concat(taicpu.op_reg(A_JMP,S_Q,NR_RAX));
  233. end
  234. else
  235. begin
  236. sym:=current_asmdata.RefAsmSymbol(procdef.mangledname);
  237. reference_reset_symbol(r,sym,0,sizeof(pint));
  238. if (cs_create_pic in current_settings.moduleswitches) and
  239. { darwin/x86_64's assembler doesn't want @PLT after call symbols }
  240. (target_info.system<>system_x86_64_darwin) then
  241. r.refaddr:=addr_pic
  242. else
  243. r.refaddr:=addr_full;
  244. list.concat(taicpu.op_ref(A_JMP,S_NO,r));
  245. end;
  246. List.concat(Tai_symbol_end.Createname(labelname));
  247. end;
  248. procedure tcgx86_64.g_local_unwind(list: TAsmList; l: TAsmLabel);
  249. var
  250. para1,para2: tcgpara;
  251. href: treference;
  252. pd: tprocdef;
  253. begin
  254. if (target_info.system<>system_x86_64_win64) then
  255. begin
  256. inherited g_local_unwind(list,l);
  257. exit;
  258. end;
  259. pd:=search_system_proc('_fpc_local_unwind');
  260. para1.init;
  261. para2.init;
  262. paramanager.getintparaloc(pd,1,para1);
  263. paramanager.getintparaloc(pd,2,para2);
  264. reference_reset_symbol(href,l,0,1);
  265. { TODO: using RSP is correct only while the stack is fixed!!
  266. (true now, but will change if/when allocating from stack is implemented) }
  267. a_load_reg_cgpara(list,OS_ADDR,NR_STACK_POINTER_REG,para1);
  268. a_loadaddr_ref_cgpara(list,href,para2);
  269. paramanager.freecgpara(list,para2);
  270. paramanager.freecgpara(list,para1);
  271. g_call(current_asmdata.CurrAsmList,'_FPC_local_unwind');
  272. para2.done;
  273. para1.done;
  274. end;
  275. procedure tcgx86_64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  276. var
  277. opc: tasmop;
  278. begin
  279. { this code can only be used to transfer raw data, not to perform
  280. conversions }
  281. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  282. not(tosize in [OS_F32,OS_F64,OS_M64]) then
  283. internalerror(2009112505);
  284. case fromsize of
  285. OS_32,OS_S32:
  286. opc:=A_MOVD;
  287. OS_64,OS_S64:
  288. opc:=A_MOVQ;
  289. else
  290. internalerror(2009112506);
  291. end;
  292. if assigned(shuffle) and
  293. not shufflescalar(shuffle) then
  294. internalerror(2009112517);
  295. list.concat(taicpu.op_reg_reg(opc,S_NO,intreg,mmreg));
  296. end;
  297. procedure tcgx86_64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  298. var
  299. opc: tasmop;
  300. begin
  301. { this code can only be used to transfer raw data, not to perform
  302. conversions }
  303. if (tcgsize2size[fromsize]<>tcgsize2size[tosize]) or
  304. not (fromsize in [OS_F32,OS_F64,OS_M64]) then
  305. internalerror(2009112507);
  306. case tosize of
  307. OS_32,OS_S32:
  308. opc:=A_MOVD;
  309. OS_64,OS_S64:
  310. opc:=A_MOVQ;
  311. else
  312. internalerror(2009112408);
  313. end;
  314. if assigned(shuffle) and
  315. not shufflescalar(shuffle) then
  316. internalerror(2009112515);
  317. list.concat(taicpu.op_reg_reg(opc,S_NO,mmreg,intreg));
  318. end;
  319. procedure create_codegen;
  320. begin
  321. cg:=tcgx86_64.create;
  322. cg128:=tcg128.create;
  323. end;
  324. end.