cgcpu.pas 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. This unit implements the code generator for the i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,
  22. cgbase,cgobj,cg64f32,cgx86,
  23. aasmbase,aasmtai,aasmdata,aasmcpu,
  24. cpubase,parabase,cgutils,
  25. symconst,symdef,symsym
  26. ;
  27. type
  28. tcg386 = class(tcgx86)
  29. procedure init_register_allocators;override;
  30. procedure do_register_allocation(list:TAsmList;headertai:tai);override;
  31. { passing parameter using push instead of mov }
  32. procedure a_load_reg_cgpara(list : TAsmList;size : tcgsize;r : tregister;const cgpara : tcgpara);override;
  33. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const cgpara : tcgpara);override;
  34. procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : tcgpara);override;
  35. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : tcgpara);override;
  36. procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
  37. procedure g_copyvaluepara_openarray(list : TAsmList;const ref:treference;const lenloc:tlocation;elesize:tcgint;destreg:tregister);
  38. procedure g_releasevaluepara_openarray(list : TAsmList;const l:tlocation);
  39. procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
  40. procedure g_maybe_got_init(list: TAsmList); override;
  41. end;
  42. tcg64f386 = class(tcg64f32)
  43. procedure a_op64_ref_reg(list : TAsmList;op:TOpCG;size : tcgsize;const ref : treference;reg : tregister64);override;
  44. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  45. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  46. procedure a_op64_const_ref(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const ref : treference);override;
  47. private
  48. procedure get_64bit_ops(op:TOpCG;var op1,op2:TAsmOp);
  49. end;
  50. procedure create_codegen;
  51. implementation
  52. uses
  53. globals,verbose,systems,cutils,
  54. paramgr,procinfo,fmodule,
  55. rgcpu,rgx86,cpuinfo;
  56. function use_push(const cgpara:tcgpara):boolean;
  57. begin
  58. result:=(not paramanager.use_fixed_stack) and
  59. assigned(cgpara.location) and
  60. (cgpara.location^.loc=LOC_REFERENCE) and
  61. (cgpara.location^.reference.index=NR_STACK_POINTER_REG);
  62. end;
  63. procedure tcg386.init_register_allocators;
  64. begin
  65. inherited init_register_allocators;
  66. if not(target_info.system in [system_i386_darwin,system_i386_iphonesim]) and
  67. (cs_create_pic in current_settings.moduleswitches) then
  68. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP])
  69. else
  70. if (cs_useebp in current_settings.optimizerswitches) and assigned(current_procinfo) and (current_procinfo.framepointer<>NR_EBP) then
  71. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_EBX,RS_ESI,RS_EDI,RS_EBP],first_int_imreg,[])
  72. else
  73. rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_EBX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP]);
  74. rg[R_MMXREGISTER]:=trgcpu.create(R_MMXREGISTER,R_SUBNONE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
  75. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
  76. rgfpu:=Trgx86fpu.create;
  77. end;
  78. procedure tcg386.do_register_allocation(list:TAsmList;headertai:tai);
  79. begin
  80. if (pi_needs_got in current_procinfo.flags) then
  81. begin
  82. if getsupreg(current_procinfo.got) < first_int_imreg then
  83. include(rg[R_INTREGISTER].used_in_proc,getsupreg(current_procinfo.got));
  84. end;
  85. inherited do_register_allocation(list,headertai);
  86. end;
  87. procedure tcg386.a_load_reg_cgpara(list : TAsmList;size : tcgsize;r : tregister;const cgpara : tcgpara);
  88. var
  89. pushsize : tcgsize;
  90. begin
  91. check_register_size(size,r);
  92. if use_push(cgpara) then
  93. begin
  94. cgpara.check_simple_location;
  95. if tcgsize2size[cgpara.location^.size]>cgpara.alignment then
  96. pushsize:=cgpara.location^.size
  97. else
  98. pushsize:=int_cgsize(cgpara.alignment);
  99. list.concat(taicpu.op_reg(A_PUSH,tcgsize2opsize[pushsize],makeregsize(list,r,pushsize)));
  100. end
  101. else
  102. inherited a_load_reg_cgpara(list,size,r,cgpara);
  103. end;
  104. procedure tcg386.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const cgpara : tcgpara);
  105. var
  106. pushsize : tcgsize;
  107. begin
  108. if use_push(cgpara) then
  109. begin
  110. cgpara.check_simple_location;
  111. if tcgsize2size[cgpara.location^.size]>cgpara.alignment then
  112. pushsize:=cgpara.location^.size
  113. else
  114. pushsize:=int_cgsize(cgpara.alignment);
  115. list.concat(taicpu.op_const(A_PUSH,tcgsize2opsize[pushsize],a));
  116. end
  117. else
  118. inherited a_load_const_cgpara(list,size,a,cgpara);
  119. end;
  120. procedure tcg386.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : tcgpara);
  121. procedure pushdata(paraloc:pcgparalocation;ofs:tcgint);
  122. var
  123. pushsize : tcgsize;
  124. opsize : topsize;
  125. tmpreg : tregister;
  126. href : treference;
  127. begin
  128. if not assigned(paraloc) then
  129. exit;
  130. if (paraloc^.loc<>LOC_REFERENCE) or
  131. (paraloc^.reference.index<>NR_STACK_POINTER_REG) or
  132. (tcgsize2size[paraloc^.size]>sizeof(aint)) then
  133. internalerror(200501162);
  134. { Pushes are needed in reverse order, add the size of the
  135. current location to the offset where to load from. This
  136. prevents wrong calculations for the last location when
  137. the size is not a power of 2 }
  138. if assigned(paraloc^.next) then
  139. pushdata(paraloc^.next,ofs+tcgsize2size[paraloc^.size]);
  140. { Push the data starting at ofs }
  141. href:=r;
  142. inc(href.offset,ofs);
  143. if tcgsize2size[paraloc^.size]>cgpara.alignment then
  144. pushsize:=paraloc^.size
  145. else
  146. pushsize:=int_cgsize(cgpara.alignment);
  147. opsize:=TCgsize2opsize[pushsize];
  148. { for go32v2 we obtain OS_F32,
  149. but pushs is not valid, we need pushl }
  150. if opsize=S_FS then
  151. opsize:=S_L;
  152. if tcgsize2size[paraloc^.size]<cgpara.alignment then
  153. begin
  154. tmpreg:=getintregister(list,pushsize);
  155. a_load_ref_reg(list,paraloc^.size,pushsize,href,tmpreg);
  156. list.concat(taicpu.op_reg(A_PUSH,opsize,tmpreg));
  157. end
  158. else
  159. begin
  160. make_simple_ref(list,href);
  161. list.concat(taicpu.op_ref(A_PUSH,opsize,href));
  162. end;
  163. end;
  164. var
  165. len : tcgint;
  166. href : treference;
  167. begin
  168. { cgpara.size=OS_NO requires a copy on the stack }
  169. if use_push(cgpara) then
  170. begin
  171. { Record copy? }
  172. if (cgpara.size=OS_NO) or (size=OS_NO) then
  173. begin
  174. cgpara.check_simple_location;
  175. len:=align(cgpara.intsize,cgpara.alignment);
  176. g_stackpointer_alloc(list,len);
  177. reference_reset_base(href,NR_STACK_POINTER_REG,0,4);
  178. g_concatcopy(list,r,href,len);
  179. end
  180. else
  181. begin
  182. if tcgsize2size[cgpara.size]<>tcgsize2size[size] then
  183. internalerror(200501161);
  184. if (cgpara.size=OS_F64) then
  185. begin
  186. href:=r;
  187. make_simple_ref(list,href);
  188. inc(href.offset,4);
  189. list.concat(taicpu.op_ref(A_PUSH,S_L,href));
  190. dec(href.offset,4);
  191. list.concat(taicpu.op_ref(A_PUSH,S_L,href));
  192. end
  193. else
  194. { We need to push the data in reverse order,
  195. therefor we use a recursive algorithm }
  196. pushdata(cgpara.location,0);
  197. end
  198. end
  199. else
  200. inherited a_load_ref_cgpara(list,size,r,cgpara);
  201. end;
  202. procedure tcg386.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : tcgpara);
  203. var
  204. tmpreg : tregister;
  205. opsize : topsize;
  206. tmpref : treference;
  207. begin
  208. with r do
  209. begin
  210. if use_push(cgpara) then
  211. begin
  212. cgpara.check_simple_location;
  213. opsize:=tcgsize2opsize[OS_ADDR];
  214. if (segment=NR_NO) and (base=NR_NO) and (index=NR_NO) then
  215. begin
  216. if assigned(symbol) then
  217. begin
  218. if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) and
  219. ((r.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL]) or
  220. (cs_create_pic in current_settings.moduleswitches)) then
  221. begin
  222. tmpreg:=getaddressregister(list);
  223. a_loadaddr_ref_reg(list,r,tmpreg);
  224. list.concat(taicpu.op_reg(A_PUSH,opsize,tmpreg));
  225. end
  226. else if cs_create_pic in current_settings.moduleswitches then
  227. begin
  228. if offset<>0 then
  229. begin
  230. tmpreg:=getaddressregister(list);
  231. a_loadaddr_ref_reg(list,r,tmpreg);
  232. list.concat(taicpu.op_reg(A_PUSH,opsize,tmpreg));
  233. end
  234. else
  235. begin
  236. reference_reset_symbol(tmpref,r.symbol,0,r.alignment);
  237. tmpref.refaddr:=addr_pic;
  238. tmpref.base:=current_procinfo.got;
  239. {$ifdef EXTDEBUG}
  240. if not (pi_needs_got in current_procinfo.flags) then
  241. Comment(V_warning,'pi_needs_got not included');
  242. {$endif EXTDEBUG}
  243. include(current_procinfo.flags,pi_needs_got);
  244. list.concat(taicpu.op_ref(A_PUSH,S_L,tmpref));
  245. end
  246. end
  247. else
  248. list.concat(Taicpu.Op_sym_ofs(A_PUSH,opsize,symbol,offset));
  249. end
  250. else
  251. list.concat(Taicpu.Op_const(A_PUSH,opsize,offset));
  252. end
  253. else if (segment=NR_NO) and (base=NR_NO) and (index<>NR_NO) and
  254. (offset=0) and (scalefactor=0) and (symbol=nil) then
  255. list.concat(Taicpu.Op_reg(A_PUSH,opsize,index))
  256. else if (segment=NR_NO) and (base<>NR_NO) and (index=NR_NO) and
  257. (offset=0) and (symbol=nil) then
  258. list.concat(Taicpu.Op_reg(A_PUSH,opsize,base))
  259. else
  260. begin
  261. tmpreg:=getaddressregister(list);
  262. a_loadaddr_ref_reg(list,r,tmpreg);
  263. list.concat(taicpu.op_reg(A_PUSH,opsize,tmpreg));
  264. end;
  265. end
  266. else
  267. inherited a_loadaddr_ref_cgpara(list,r,cgpara);
  268. end;
  269. end;
  270. procedure tcg386.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  271. procedure increase_sp(a : tcgint);
  272. var
  273. href : treference;
  274. begin
  275. reference_reset_base(href,NR_STACK_POINTER_REG,a,0);
  276. { normally, lea is a better choice than an add }
  277. list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
  278. end;
  279. begin
  280. { MMX needs to call EMMS }
  281. if assigned(rg[R_MMXREGISTER]) and
  282. (rg[R_MMXREGISTER].uses_registers) then
  283. list.concat(Taicpu.op_none(A_EMMS,S_NO));
  284. { remove stackframe }
  285. if not nostackframe then
  286. begin
  287. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  288. (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
  289. begin
  290. if current_procinfo.final_localsize<>0 then
  291. increase_sp(current_procinfo.final_localsize);
  292. if (not paramanager.use_fixed_stack) then
  293. internal_restore_regs(list,true);
  294. if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
  295. list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  296. end
  297. else
  298. begin
  299. if (not paramanager.use_fixed_stack) then
  300. internal_restore_regs(list,not (pi_has_stack_allocs in current_procinfo.flags));
  301. list.concat(Taicpu.op_none(A_LEAVE,S_NO));
  302. end;
  303. list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
  304. end;
  305. { return from proc }
  306. if (po_interrupt in current_procinfo.procdef.procoptions) and
  307. { this messes up stack alignment }
  308. (target_info.stackalign=4) then
  309. begin
  310. if assigned(current_procinfo.procdef.funcretloc[calleeside].location) and
  311. (current_procinfo.procdef.funcretloc[calleeside].location^.loc=LOC_REGISTER) then
  312. begin
  313. if (getsupreg(current_procinfo.procdef.funcretloc[calleeside].location^.register)=RS_EAX) then
  314. list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
  315. else
  316. internalerror(2010053001);
  317. end
  318. else
  319. list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EAX));
  320. list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EBX));
  321. list.concat(Taicpu.Op_reg(A_POP,S_L,NR_ECX));
  322. if (current_procinfo.procdef.funcretloc[calleeside].size in [OS_64,OS_S64]) and
  323. assigned(current_procinfo.procdef.funcretloc[calleeside].location) and
  324. assigned(current_procinfo.procdef.funcretloc[calleeside].location^.next) and
  325. (current_procinfo.procdef.funcretloc[calleeside].location^.next^.loc=LOC_REGISTER) then
  326. begin
  327. if (getsupreg(current_procinfo.procdef.funcretloc[calleeside].location^.next^.register)=RS_EDX) then
  328. list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
  329. else
  330. internalerror(2010053002);
  331. end
  332. else
  333. list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDX));
  334. list.concat(Taicpu.Op_reg(A_POP,S_L,NR_ESI));
  335. list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDI));
  336. { .... also the segment registers }
  337. list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DS));
  338. list.concat(Taicpu.Op_reg(A_POP,S_W,NR_ES));
  339. list.concat(Taicpu.Op_reg(A_POP,S_W,NR_FS));
  340. list.concat(Taicpu.Op_reg(A_POP,S_W,NR_GS));
  341. { this restores the flags }
  342. list.concat(Taicpu.Op_none(A_IRET,S_NO));
  343. end
  344. { Routines with the poclearstack flag set use only a ret }
  345. else if (current_procinfo.procdef.proccalloption in clearstack_pocalls) and
  346. (not paramanager.use_fixed_stack) then
  347. begin
  348. { complex return values are removed from stack in C code PM }
  349. { but not on win32 }
  350. { and not for safecall with hidden exceptions, because the result }
  351. { wich contains the exception is passed in EAX }
  352. if ((target_info.system <> system_i386_win32) or
  353. (target_info.abi=abi_old_win32_gnu)) and
  354. not ((current_procinfo.procdef.proccalloption = pocall_safecall) and
  355. (tf_safecall_exceptions in target_info.flags)) and
  356. paramanager.ret_in_param(current_procinfo.procdef.returndef,
  357. current_procinfo.procdef) then
  358. list.concat(Taicpu.Op_const(A_RET,S_W,sizeof(aint)))
  359. else
  360. list.concat(Taicpu.Op_none(A_RET,S_NO));
  361. end
  362. { ... also routines with parasize=0 }
  363. else if (parasize=0) then
  364. list.concat(Taicpu.Op_none(A_RET,S_NO))
  365. else
  366. begin
  367. { parameters are limited to 65535 bytes because ret allows only imm16 }
  368. if (parasize>65535) then
  369. CGMessage(cg_e_parasize_too_big);
  370. list.concat(Taicpu.Op_const(A_RET,S_W,parasize));
  371. end;
  372. end;
  373. procedure tcg386.g_copyvaluepara_openarray(list : TAsmList;const ref:treference;const lenloc:tlocation;elesize:tcgint;destreg:tregister);
  374. var
  375. power : longint;
  376. opsize : topsize;
  377. {$ifndef __NOWINPECOFF__}
  378. again,ok : tasmlabel;
  379. {$endif}
  380. begin
  381. { get stack space }
  382. getcpuregister(list,NR_EDI);
  383. a_load_loc_reg(list,OS_INT,lenloc,NR_EDI);
  384. list.concat(Taicpu.op_reg(A_INC,S_L,NR_EDI));
  385. { Now EDI contains (high+1). }
  386. { special case handling for elesize=8, 4 and 2:
  387. set ECX = (high+1) instead of ECX = (high+1)*elesize.
  388. In the case of elesize=4 and 2, this allows us to avoid the SHR later.
  389. In the case of elesize=8, we can later use a SHL ECX, 1 instead of
  390. SHR ECX, 2 which is one byte shorter. }
  391. if (elesize=8) or (elesize=4) or (elesize=2) then
  392. begin
  393. { Now EDI contains (high+1). Copy it to ECX for later use. }
  394. getcpuregister(list,NR_ECX);
  395. list.concat(Taicpu.op_reg_reg(A_MOV,S_L,NR_EDI,NR_ECX));
  396. end;
  397. { EDI := EDI * elesize }
  398. if (elesize<>1) then
  399. begin
  400. if ispowerof2(elesize, power) then
  401. list.concat(Taicpu.op_const_reg(A_SHL,S_L,power,NR_EDI))
  402. else
  403. list.concat(Taicpu.op_const_reg(A_IMUL,S_L,elesize,NR_EDI));
  404. end;
  405. if (elesize<>8) and (elesize<>4) and (elesize<>2) then
  406. begin
  407. { Now EDI contains (high+1)*elesize. Copy it to ECX for later use. }
  408. getcpuregister(list,NR_ECX);
  409. list.concat(Taicpu.op_reg_reg(A_MOV,S_L,NR_EDI,NR_ECX));
  410. end;
  411. {$ifndef __NOWINPECOFF__}
  412. { windows guards only a few pages for stack growing, }
  413. { so we have to access every page first }
  414. if target_info.system=system_i386_win32 then
  415. begin
  416. current_asmdata.getjumplabel(again);
  417. current_asmdata.getjumplabel(ok);
  418. a_label(list,again);
  419. list.concat(Taicpu.op_const_reg(A_CMP,S_L,winstackpagesize,NR_EDI));
  420. a_jmp_cond(list,OC_B,ok);
  421. list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize-4,NR_ESP));
  422. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI));
  423. list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize,NR_EDI));
  424. a_jmp_always(list,again);
  425. a_label(list,ok);
  426. end;
  427. {$endif __NOWINPECOFF__}
  428. { If we were probing pages, EDI=(size mod pagesize) and ESP is decremented
  429. by (size div pagesize)*pagesize, otherwise EDI=size.
  430. Either way, subtracting EDI from ESP will set ESP to desired final value. }
  431. list.concat(Taicpu.op_reg_reg(A_SUB,S_L,NR_EDI,NR_ESP));
  432. { align stack on 4 bytes }
  433. list.concat(Taicpu.op_const_reg(A_AND,S_L,aint($fffffff4),NR_ESP));
  434. { load destination, don't use a_load_reg_reg, that will add a move instruction
  435. that can confuse the reg allocator }
  436. list.concat(Taicpu.Op_reg_reg(A_MOV,S_L,NR_ESP,NR_EDI));
  437. { Allocate ESI and load it with source }
  438. getcpuregister(list,NR_ESI);
  439. a_loadaddr_ref_reg(list,ref,NR_ESI);
  440. { calculate size }
  441. opsize:=S_B;
  442. if elesize=8 then
  443. begin
  444. opsize:=S_L;
  445. { ECX is number of qwords, convert to dwords }
  446. list.concat(Taicpu.op_const_reg(A_SHL,S_L,1,NR_ECX))
  447. end
  448. else if elesize=4 then
  449. begin
  450. opsize:=S_L;
  451. { ECX is already number of dwords, so no need to SHL/SHR }
  452. end
  453. else if elesize=2 then
  454. begin
  455. opsize:=S_W;
  456. { ECX is already number of words, so no need to SHL/SHR }
  457. end
  458. else
  459. if (elesize and 3)=0 then
  460. begin
  461. opsize:=S_L;
  462. { ECX is number of bytes, convert to dwords }
  463. list.concat(Taicpu.op_const_reg(A_SHR,S_L,2,NR_ECX))
  464. end
  465. else
  466. if (elesize and 1)=0 then
  467. begin
  468. opsize:=S_W;
  469. { ECX is number of bytes, convert to words }
  470. list.concat(Taicpu.op_const_reg(A_SHR,S_L,1,NR_ECX))
  471. end;
  472. if ts_cld in current_settings.targetswitches then
  473. list.concat(Taicpu.op_none(A_CLD,S_NO));
  474. list.concat(Taicpu.op_none(A_REP,S_NO));
  475. case opsize of
  476. S_B : list.concat(Taicpu.Op_none(A_MOVSB,S_NO));
  477. S_W : list.concat(Taicpu.Op_none(A_MOVSW,S_NO));
  478. S_L : list.concat(Taicpu.Op_none(A_MOVSD,S_NO));
  479. end;
  480. ungetcpuregister(list,NR_EDI);
  481. ungetcpuregister(list,NR_ECX);
  482. ungetcpuregister(list,NR_ESI);
  483. { patch the new address, but don't use a_load_reg_reg, that will add a move instruction
  484. that can confuse the reg allocator }
  485. list.concat(Taicpu.Op_reg_reg(A_MOV,S_L,NR_ESP,destreg));
  486. include(current_procinfo.flags,pi_has_stack_allocs);
  487. end;
  488. procedure tcg386.g_releasevaluepara_openarray(list : TAsmList;const l:tlocation);
  489. begin
  490. { Nothing to release }
  491. end;
  492. procedure tcg386.g_maybe_got_init(list: TAsmList);
  493. var
  494. notdarwin: boolean;
  495. begin
  496. { allocate PIC register }
  497. if (cs_create_pic in current_settings.moduleswitches) and
  498. (tf_pic_uses_got in target_info.flags) and
  499. (pi_needs_got in current_procinfo.flags) then
  500. begin
  501. notdarwin:=not(target_info.system in [system_i386_darwin,system_i386_iphonesim]);
  502. { on darwin, the got register is virtual (and allocated earlier
  503. already) }
  504. if notdarwin then
  505. { ecx could be used in leaf procedures that don't use ecx to pass
  506. aparameter }
  507. current_procinfo.got:=NR_EBX;
  508. if notdarwin { needs testing before it can be enabled for non-darwin platforms
  509. and
  510. (current_settings.optimizecputype in [cpu_Pentium2,cpu_Pentium3,cpu_Pentium4]) } then
  511. begin
  512. current_module.requires_ebx_pic_helper:=true;
  513. a_call_name_static(list,'fpc_geteipasebx');
  514. end
  515. else
  516. begin
  517. { call/pop is faster than call/ret/mov on Core Solo and later
  518. according to Apple's benchmarking -- and all Intel Macs
  519. have at least a Core Solo (furthermore, the i386 - Pentium 1
  520. don't have a return stack buffer) }
  521. a_call_name_static(list,current_procinfo.CurrGOTLabel.name);
  522. a_label(list,current_procinfo.CurrGotLabel);
  523. list.concat(taicpu.op_reg(A_POP,S_L,current_procinfo.got))
  524. end;
  525. if notdarwin then
  526. begin
  527. list.concat(taicpu.op_sym_ofs_reg(A_ADD,S_L,current_asmdata.RefAsmSymbol('_GLOBAL_OFFSET_TABLE_'),0,NR_PIC_OFFSET_REG));
  528. list.concat(tai_regalloc.alloc(NR_PIC_OFFSET_REG,nil));
  529. end;
  530. end;
  531. end;
  532. procedure tcg386.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
  533. {
  534. possible calling conventions:
  535. default stdcall cdecl pascal register
  536. default(0): OK OK OK OK OK
  537. virtual(1): OK OK OK OK OK(2 or 1)
  538. (0):
  539. set self parameter to correct value
  540. jmp mangledname
  541. (1): The wrapper code use %ecx to reach the virtual method address
  542. set self to correct value
  543. move self,%eax
  544. mov 0(%eax),%ecx ; load vmt
  545. jmp vmtoffs(%ecx) ; method offs
  546. (2): Virtual use values pushed on stack to reach the method address
  547. so the following code be generated:
  548. set self to correct value
  549. push %ebx ; allocate space for function address
  550. push %eax
  551. mov self,%eax
  552. mov 0(%eax),%eax ; load vmt
  553. mov vmtoffs(%eax),eax ; method offs
  554. mov %eax,4(%esp)
  555. pop %eax
  556. ret 0; jmp the address
  557. }
  558. { returns whether ECX is used (either as a parameter or is nonvolatile and shouldn't be changed) }
  559. function is_ecx_used: boolean;
  560. var
  561. i: Integer;
  562. hp: tparavarsym;
  563. paraloc: PCGParaLocation;
  564. begin
  565. if not (RS_ECX in paramanager.get_volatile_registers_int(procdef.proccalloption)) then
  566. exit(true);
  567. for i:=0 to procdef.paras.count-1 do
  568. begin
  569. hp:=tparavarsym(procdef.paras[i]);
  570. procdef.init_paraloc_info(calleeside);
  571. paraloc:=hp.paraloc[calleeside].Location;
  572. while paraloc<>nil do
  573. begin
  574. if (paraloc^.Loc=LOC_REGISTER) and (getsupreg(paraloc^.register)=RS_ECX) then
  575. exit(true);
  576. paraloc:=paraloc^.Next;
  577. end;
  578. end;
  579. Result:=false;
  580. end;
  581. procedure getselftoeax(offs: longint);
  582. var
  583. href : treference;
  584. selfoffsetfromsp : longint;
  585. begin
  586. { mov offset(%esp),%eax }
  587. if (procdef.proccalloption<>pocall_register) then
  588. begin
  589. { framepointer is pushed for nested procs }
  590. if procdef.parast.symtablelevel>normal_function_level then
  591. selfoffsetfromsp:=2*sizeof(aint)
  592. else
  593. selfoffsetfromsp:=sizeof(aint);
  594. reference_reset_base(href,NR_ESP,selfoffsetfromsp+offs,4);
  595. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
  596. end;
  597. end;
  598. procedure loadvmtto(reg: tregister);
  599. var
  600. href : treference;
  601. begin
  602. { mov 0(%eax),%reg ; load vmt}
  603. reference_reset_base(href,NR_EAX,0,4);
  604. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,reg);
  605. end;
  606. procedure op_onregmethodaddr(op: TAsmOp; reg: tregister);
  607. var
  608. href : treference;
  609. begin
  610. if (procdef.extnumber=$ffff) then
  611. Internalerror(200006139);
  612. { call/jmp vmtoffs(%reg) ; method offs }
  613. reference_reset_base(href,reg,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),4);
  614. list.concat(taicpu.op_ref(op,S_L,href));
  615. end;
  616. procedure loadmethodoffstoeax;
  617. var
  618. href : treference;
  619. begin
  620. if (procdef.extnumber=$ffff) then
  621. Internalerror(200006139);
  622. { mov vmtoffs(%eax),%eax ; method offs }
  623. reference_reset_base(href,NR_EAX,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),4);
  624. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
  625. end;
  626. var
  627. lab : tasmsymbol;
  628. make_global : boolean;
  629. href : treference;
  630. begin
  631. if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
  632. Internalerror(200006137);
  633. if not assigned(procdef.struct) or
  634. (procdef.procoptions*[po_classmethod, po_staticmethod,
  635. po_methodpointer, po_interrupt, po_iocheck]<>[]) then
  636. Internalerror(200006138);
  637. if procdef.owner.symtabletype<>ObjectSymtable then
  638. Internalerror(200109191);
  639. make_global:=false;
  640. if (not current_module.is_unit) or
  641. create_smartlink or
  642. (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
  643. make_global:=true;
  644. if make_global then
  645. List.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
  646. else
  647. List.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
  648. { set param1 interface to self }
  649. g_adjust_self_value(list,procdef,ioffset);
  650. if (po_virtualmethod in procdef.procoptions) and
  651. not is_objectpascal_helper(procdef.struct) then
  652. begin
  653. if (procdef.proccalloption=pocall_register) and is_ecx_used then
  654. begin
  655. { case 2 }
  656. list.concat(taicpu.op_reg(A_PUSH,S_L,NR_EBX)); { allocate space for address}
  657. list.concat(taicpu.op_reg(A_PUSH,S_L,NR_EAX));
  658. getselftoeax(8);
  659. loadvmtto(NR_EAX);
  660. loadmethodoffstoeax;
  661. { mov %eax,4(%esp) }
  662. reference_reset_base(href,NR_ESP,4,4);
  663. list.concat(taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  664. { pop %eax }
  665. list.concat(taicpu.op_reg(A_POP,S_L,NR_EAX));
  666. { ret ; jump to the address }
  667. list.concat(taicpu.op_none(A_RET,S_L));
  668. end
  669. else
  670. begin
  671. { case 1 }
  672. getselftoeax(0);
  673. loadvmtto(NR_ECX);
  674. op_onregmethodaddr(A_JMP,NR_ECX);
  675. end;
  676. end
  677. { case 0 }
  678. else
  679. begin
  680. if (target_info.system <> system_i386_darwin) then
  681. begin
  682. lab:=current_asmdata.RefAsmSymbol(procdef.mangledname);
  683. list.concat(taicpu.op_sym(A_JMP,S_NO,lab))
  684. end
  685. else
  686. list.concat(taicpu.op_sym(A_JMP,S_NO,get_darwin_call_stub(procdef.mangledname,false)))
  687. end;
  688. List.concat(Tai_symbol_end.Createname(labelname));
  689. end;
  690. { ************* 64bit operations ************ }
  691. procedure tcg64f386.get_64bit_ops(op:TOpCG;var op1,op2:TAsmOp);
  692. begin
  693. case op of
  694. OP_ADD :
  695. begin
  696. op1:=A_ADD;
  697. op2:=A_ADC;
  698. end;
  699. OP_SUB :
  700. begin
  701. op1:=A_SUB;
  702. op2:=A_SBB;
  703. end;
  704. OP_XOR :
  705. begin
  706. op1:=A_XOR;
  707. op2:=A_XOR;
  708. end;
  709. OP_OR :
  710. begin
  711. op1:=A_OR;
  712. op2:=A_OR;
  713. end;
  714. OP_AND :
  715. begin
  716. op1:=A_AND;
  717. op2:=A_AND;
  718. end;
  719. else
  720. internalerror(200203241);
  721. end;
  722. end;
  723. procedure tcg64f386.a_op64_ref_reg(list : TAsmList;op:TOpCG;size : tcgsize;const ref : treference;reg : tregister64);
  724. var
  725. op1,op2 : TAsmOp;
  726. tempref : treference;
  727. begin
  728. if not(op in [OP_NEG,OP_NOT]) then
  729. begin
  730. get_64bit_ops(op,op1,op2);
  731. tempref:=ref;
  732. tcgx86(cg).make_simple_ref(list,tempref);
  733. list.concat(taicpu.op_ref_reg(op1,S_L,tempref,reg.reglo));
  734. inc(tempref.offset,4);
  735. list.concat(taicpu.op_ref_reg(op2,S_L,tempref,reg.reghi));
  736. end
  737. else
  738. begin
  739. a_load64_ref_reg(list,ref,reg);
  740. a_op64_reg_reg(list,op,size,reg,reg);
  741. end;
  742. end;
  743. procedure tcg64f386.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  744. var
  745. op1,op2 : TAsmOp;
  746. begin
  747. case op of
  748. OP_NEG :
  749. begin
  750. if (regsrc.reglo<>regdst.reglo) then
  751. a_load64_reg_reg(list,regsrc,regdst);
  752. list.concat(taicpu.op_reg(A_NOT,S_L,regdst.reghi));
  753. list.concat(taicpu.op_reg(A_NEG,S_L,regdst.reglo));
  754. list.concat(taicpu.op_const_reg(A_SBB,S_L,-1,regdst.reghi));
  755. exit;
  756. end;
  757. OP_NOT :
  758. begin
  759. if (regsrc.reglo<>regdst.reglo) then
  760. a_load64_reg_reg(list,regsrc,regdst);
  761. list.concat(taicpu.op_reg(A_NOT,S_L,regdst.reghi));
  762. list.concat(taicpu.op_reg(A_NOT,S_L,regdst.reglo));
  763. exit;
  764. end;
  765. end;
  766. get_64bit_ops(op,op1,op2);
  767. list.concat(taicpu.op_reg_reg(op1,S_L,regsrc.reglo,regdst.reglo));
  768. list.concat(taicpu.op_reg_reg(op2,S_L,regsrc.reghi,regdst.reghi));
  769. end;
  770. procedure tcg64f386.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  771. var
  772. op1,op2 : TAsmOp;
  773. begin
  774. case op of
  775. OP_AND,OP_OR,OP_XOR:
  776. begin
  777. cg.a_op_const_reg(list,op,OS_32,tcgint(lo(value)),reg.reglo);
  778. cg.a_op_const_reg(list,op,OS_32,tcgint(hi(value)),reg.reghi);
  779. end;
  780. OP_ADD, OP_SUB:
  781. begin
  782. // can't use a_op_const_ref because this may use dec/inc
  783. get_64bit_ops(op,op1,op2);
  784. list.concat(taicpu.op_const_reg(op1,S_L,aint(lo(value)),reg.reglo));
  785. list.concat(taicpu.op_const_reg(op2,S_L,aint(hi(value)),reg.reghi));
  786. end;
  787. else
  788. internalerror(200204021);
  789. end;
  790. end;
  791. procedure tcg64f386.a_op64_const_ref(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const ref : treference);
  792. var
  793. op1,op2 : TAsmOp;
  794. tempref : treference;
  795. begin
  796. tempref:=ref;
  797. tcgx86(cg).make_simple_ref(list,tempref);
  798. case op of
  799. OP_AND,OP_OR,OP_XOR:
  800. begin
  801. cg.a_op_const_ref(list,op,OS_32,tcgint(lo(value)),tempref);
  802. inc(tempref.offset,4);
  803. cg.a_op_const_ref(list,op,OS_32,tcgint(hi(value)),tempref);
  804. end;
  805. OP_ADD, OP_SUB:
  806. begin
  807. get_64bit_ops(op,op1,op2);
  808. // can't use a_op_const_ref because this may use dec/inc
  809. list.concat(taicpu.op_const_ref(op1,S_L,aint(lo(value)),tempref));
  810. inc(tempref.offset,4);
  811. list.concat(taicpu.op_const_ref(op2,S_L,aint(hi(value)),tempref));
  812. end;
  813. else
  814. internalerror(200204022);
  815. end;
  816. end;
  817. procedure create_codegen;
  818. begin
  819. cg := tcg386.create;
  820. cg64 := tcg64f386.create;
  821. end;
  822. end.