cgx86.pas 88 KB


  1. {
  2. Copyright (c) 1998-2005 by Florian Klaempfl
  3. This unit implements the common parts of the code generator for the i386 and the x86-64.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. { This unit implements the common parts of the code generator for the i386 and the x86-64.
  18. }
  19. unit cgx86;
  20. {$i fpcdefs.inc}
  21. interface
  22. uses
  23. globtype,
  24. cgbase,cgutils,cgobj,
  25. aasmbase,aasmtai,aasmdata,aasmcpu,
  26. cpubase,cpuinfo,rgobj,rgx86,rgcpu,
  27. symconst,symtype,symdef;
  28. type
  29. tcgx86 = class(tcg)
  30. rgfpu : Trgx86fpu;
  31. procedure done_register_allocators;override;
  32. function getfpuregister(list:TAsmList;size:Tcgsize):Tregister;override;
  33. function getmmxregister(list:TAsmList):Tregister;
  34. function getmmregister(list:TAsmList;size:Tcgsize):Tregister;override;
  35. procedure getcpuregister(list:TAsmList;r:Tregister);override;
  36. procedure ungetcpuregister(list:TAsmList;r:Tregister);override;
  37. procedure alloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);override;
  38. procedure dealloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);override;
  39. function uses_registers(rt:Tregistertype):boolean;override;
  40. procedure add_reg_instruction(instr:Tai;r:tregister);override;
  41. procedure dec_fpu_stack;
  42. procedure inc_fpu_stack;
  43. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  44. procedure a_call_reg(list : TAsmList;reg : tregister);override;
  45. procedure a_call_ref(list : TAsmList;ref : treference);override;
  46. procedure a_call_name_static(list : TAsmList;const s : string);override;
  47. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  48. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  49. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  50. procedure a_op_ref_reg(list : TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister); override;
  51. procedure a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference); override;
  52. { move instructions }
  53. procedure a_load_const_reg(list : TAsmList; tosize: tcgsize; a : tcgint;reg : tregister);override;
  54. procedure a_load_const_ref(list : TAsmList; tosize: tcgsize; a : tcgint;const ref : treference);override;
  55. procedure a_load_reg_ref(list : TAsmList;fromsize,tosize: tcgsize; reg : tregister;const ref : treference);override;
  56. procedure a_load_ref_reg(list : TAsmList;fromsize,tosize: tcgsize;const ref : treference;reg : tregister);override;
  57. procedure a_load_reg_reg(list : TAsmList;fromsize,tosize: tcgsize;reg1,reg2 : tregister);override;
  58. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  59. { bit scan instructions }
  60. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
  61. { fpu move instructions }
  62. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  63. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  64. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  65. { vector register move instructions }
  66. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  67. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  68. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  69. procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  70. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle);override;
  71. { comparison operations }
  72. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  73. l : tasmlabel);override;
  74. procedure a_cmp_const_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;const ref : treference;
  75. l : tasmlabel);override;
  76. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  77. procedure a_cmp_ref_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;const ref: treference; reg : tregister; l : tasmlabel); override;
  78. procedure a_cmp_reg_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg : tregister; const ref: treference; l : tasmlabel); override;
  79. procedure a_jmp_name(list : TAsmList;const s : string);override;
  80. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  81. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  82. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister); override;
  83. procedure g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference); override;
  84. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  85. { entry/exit code helpers }
  86. procedure g_profilecode(list : TAsmList);override;
  87. procedure g_stackpointer_alloc(list : TAsmList;localsize : longint);override;
  88. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  89. procedure g_overflowcheck(list: TAsmList; const l:tlocation;def:tdef);override;
  90. procedure g_external_wrapper(list: TAsmList; procdef: tprocdef; const externalname: string); override;
  91. procedure make_simple_ref(list:TAsmList;var ref: treference);
  92. protected
  93. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  94. procedure check_register_size(size:tcgsize;reg:tregister);
  95. procedure opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;dst: tregister; shuffle : pmmshuffle);
  96. function get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
  97. private
  98. procedure sizes2load(s1,s2 : tcgsize;var op: tasmop; var s3: topsize);
  99. procedure floatload(list: TAsmList; t : tcgsize;const ref : treference);
  100. procedure floatstore(list: TAsmList; t : tcgsize;const ref : treference);
  101. procedure floatloadops(t : tcgsize;var op : tasmop;var s : topsize);
  102. procedure floatstoreops(t : tcgsize;var op : tasmop;var s : topsize);
  103. end;
  104. const
  105. {$if defined(x86_64)}
  106. TCGSize2OpSize: Array[tcgsize] of topsize =
  107. (S_NO,S_B,S_W,S_L,S_Q,S_XMM,S_B,S_W,S_L,S_Q,S_XMM,
  108. S_FS,S_FL,S_FX,S_IQ,S_FXX,
  109. S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,
  110. S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM);
  111. {$elseif defined(i386)}
  112. TCGSize2OpSize: Array[tcgsize] of topsize =
  113. (S_NO,S_B,S_W,S_L,S_L,S_T,S_B,S_W,S_L,S_L,S_L,
  114. S_FS,S_FL,S_FX,S_IQ,S_FXX,
  115. S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,
  116. S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM);
  117. {$elseif defined(i8086)}
  118. TCGSize2OpSize: Array[tcgsize] of topsize =
  119. (S_NO,S_B,S_W,S_W,S_W,S_T,S_B,S_W,S_W,S_W,S_W,
  120. S_FS,S_FL,S_FX,S_IQ,S_FXX,
  121. S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,
  122. S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM);
  123. {$endif}
  124. {$ifndef NOTARGETWIN}
  125. winstackpagesize = 4096;
  126. {$endif NOTARGETWIN}
  127. function UseAVX: boolean;
  128. implementation
  129. uses
  130. globals,verbose,systems,cutils,
  131. defutil,paramgr,procinfo,
  132. tgobj,ncgutil,
  133. fmodule,symsym;
  134. function UseAVX: boolean;
  135. begin
  136. Result:=current_settings.fputype in [fpu_avx];
  137. end;
  138. const
  139. TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
  140. A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
  141. A_SAR,A_SHL,A_SHR,A_SUB,A_XOR,A_ROL,A_ROR);
  142. TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
  143. C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A);
  144. procedure Tcgx86.done_register_allocators;
  145. begin
  146. rg[R_INTREGISTER].free;
  147. rg[R_MMREGISTER].free;
  148. rg[R_MMXREGISTER].free;
  149. rgfpu.free;
  150. inherited done_register_allocators;
  151. end;
  152. function Tcgx86.getfpuregister(list:TAsmList;size:Tcgsize):Tregister;
  153. begin
  154. result:=rgfpu.getregisterfpu(list);
  155. end;
  156. function Tcgx86.getmmxregister(list:TAsmList):Tregister;
  157. begin
  158. if not assigned(rg[R_MMXREGISTER]) then
  159. internalerror(2003121214);
  160. result:=rg[R_MMXREGISTER].getregister(list,R_SUBNONE);
  161. end;
  162. function Tcgx86.getmmregister(list:TAsmList;size:Tcgsize):Tregister;
  163. begin
  164. if not assigned(rg[R_MMREGISTER]) then
  165. internalerror(2003121234);
  166. case size of
  167. OS_F64:
  168. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD);
  169. OS_F32:
  170. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
  171. OS_M64:
  172. result:=rg[R_MMREGISTER].getregister(list,R_SUBQ);
  173. OS_M128:
  174. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMWHOLE);
  175. else
  176. internalerror(200506041);
  177. end;
  178. end;
  179. procedure Tcgx86.getcpuregister(list:TAsmList;r:Tregister);
  180. begin
  181. if getregtype(r)=R_FPUREGISTER then
  182. internalerror(2003121210)
  183. else
  184. inherited getcpuregister(list,r);
  185. end;
  186. procedure tcgx86.ungetcpuregister(list:TAsmList;r:Tregister);
  187. begin
  188. if getregtype(r)=R_FPUREGISTER then
  189. rgfpu.ungetregisterfpu(list,r)
  190. else
  191. inherited ungetcpuregister(list,r);
  192. end;
  193. procedure Tcgx86.alloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);
  194. begin
  195. if rt<>R_FPUREGISTER then
  196. inherited alloccpuregisters(list,rt,r);
  197. end;
  198. procedure Tcgx86.dealloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);
  199. begin
  200. if rt<>R_FPUREGISTER then
  201. inherited dealloccpuregisters(list,rt,r);
  202. end;
  203. function Tcgx86.uses_registers(rt:Tregistertype):boolean;
  204. begin
  205. if rt=R_FPUREGISTER then
  206. result:=false
  207. else
  208. result:=inherited uses_registers(rt);
  209. end;
  210. procedure tcgx86.add_reg_instruction(instr:Tai;r:tregister);
  211. begin
  212. if getregtype(r)<>R_FPUREGISTER then
  213. inherited add_reg_instruction(instr,r);
  214. end;
  215. procedure tcgx86.dec_fpu_stack;
  216. begin
  217. if rgfpu.fpuvaroffset<=0 then
  218. internalerror(200604201);
  219. dec(rgfpu.fpuvaroffset);
  220. end;
  221. procedure tcgx86.inc_fpu_stack;
  222. begin
  223. if rgfpu.fpuvaroffset>=7 then
  224. internalerror(2012062901);
  225. inc(rgfpu.fpuvaroffset);
  226. end;
  227. {****************************************************************************
  228. This is private property, keep out! :)
  229. ****************************************************************************}
  230. procedure tcgx86.sizes2load(s1,s2 : tcgsize; var op: tasmop; var s3: topsize);
  231. begin
  232. { ensure to have always valid sizes }
  233. if s1=OS_NO then
  234. s1:=s2;
  235. if s2=OS_NO then
  236. s2:=s1;
  237. case s2 of
  238. OS_8,OS_S8 :
  239. if S1 in [OS_8,OS_S8] then
  240. s3 := S_B
  241. else
  242. internalerror(200109221);
  243. OS_16,OS_S16:
  244. case s1 of
  245. OS_8,OS_S8:
  246. s3 := S_BW;
  247. OS_16,OS_S16:
  248. s3 := S_W;
  249. else
  250. internalerror(200109222);
  251. end;
  252. OS_32,OS_S32:
  253. case s1 of
  254. OS_8,OS_S8:
  255. s3 := S_BL;
  256. OS_16,OS_S16:
  257. s3 := S_WL;
  258. OS_32,OS_S32:
  259. s3 := S_L;
  260. else
  261. internalerror(200109223);
  262. end;
  263. {$ifdef x86_64}
  264. OS_64,OS_S64:
  265. case s1 of
  266. OS_8:
  267. s3 := S_BL;
  268. OS_S8:
  269. s3 := S_BQ;
  270. OS_16:
  271. s3 := S_WL;
  272. OS_S16:
  273. s3 := S_WQ;
  274. OS_32:
  275. s3 := S_L;
  276. OS_S32:
  277. s3 := S_LQ;
  278. OS_64,OS_S64:
  279. s3 := S_Q;
  280. else
  281. internalerror(200304302);
  282. end;
  283. {$endif x86_64}
  284. else
  285. internalerror(200109227);
  286. end;
  287. if s3 in [S_B,S_W,S_L,S_Q] then
  288. op := A_MOV
  289. else if s1 in [OS_8,OS_16,OS_32,OS_64] then
  290. op := A_MOVZX
  291. else
  292. {$ifdef x86_64}
  293. if s3 in [S_LQ] then
  294. op := A_MOVSXD
  295. else
  296. {$endif x86_64}
  297. op := A_MOVSX;
  298. end;
  299. procedure tcgx86.make_simple_ref(list:TAsmList;var ref: treference);
  300. var
  301. hreg : tregister;
  302. href : treference;
  303. {$ifndef x86_64}
  304. add_hreg: boolean;
  305. {$endif not x86_64}
  306. begin
  307. { make_simple_ref() may have already been called earlier, and in that
  308. case make sure we don't perform the PIC-simplifications twice }
  309. if (ref.refaddr in [addr_pic,addr_pic_no_got]) then
  310. exit;
  311. {$if defined(x86_64)}
  312. { Only 32bit is allowed }
  313. { Note that this isn't entirely correct: for RIP-relative targets/memory models,
  314. it is actually (offset+@symbol-RIP) that should fit into 32 bits. Since two last
  315. members aren't known until link time, ABIs place very pessimistic limits
  316. on offset values, e.g. SysV AMD64 allows +/-$1000000 (16 megabytes) }
  317. if ((ref.offset<low(longint)) or (ref.offset>high(longint))) or
  318. { absolute address is not a common thing in x64, but nevertheless a possible one }
  319. ((ref.base=NR_NO) and (ref.index=NR_NO) and (ref.symbol=nil)) then
  320. begin
  321. { Load constant value to register }
  322. hreg:=GetAddressRegister(list);
  323. list.concat(taicpu.op_const_reg(A_MOV,S_Q,ref.offset,hreg));
  324. ref.offset:=0;
  325. {if assigned(ref.symbol) then
  326. begin
  327. list.concat(taicpu.op_sym_ofs_reg(A_ADD,S_Q,ref.symbol,0,hreg));
  328. ref.symbol:=nil;
  329. end;}
  330. { Add register to reference }
  331. if ref.base=NR_NO then
  332. ref.base:=hreg
  333. else if ref.index=NR_NO then
  334. ref.index:=hreg
  335. else
  336. begin
  337. { don't use add, as the flags may contain a value }
  338. reference_reset_base(href,ref.base,0,8);
  339. href.index:=hreg;
  340. if ref.scalefactor<>0 then
  341. begin
  342. reference_reset_base(href,ref.base,0,8);
  343. href.index:=hreg;
  344. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  345. ref.base:=hreg;
  346. end
  347. else
  348. begin
  349. reference_reset_base(href,ref.index,0,8);
  350. href.index:=hreg;
  351. list.concat(taicpu.op_reg_reg(A_ADD,S_Q,ref.index,hreg));
  352. ref.index:=hreg;
  353. end;
  354. end;
  355. end;
  356. if assigned(ref.symbol) then
  357. begin
  358. if cs_create_pic in current_settings.moduleswitches then
  359. begin
  360. { Local symbols must not be accessed via the GOT }
  361. if (ref.symbol.bind=AB_LOCAL) then
  362. begin
  363. { unfortunately, RIP-based addresses don't support an index }
  364. if (ref.base<>NR_NO) or
  365. (ref.index<>NR_NO) then
  366. begin
  367. reference_reset_symbol(href,ref.symbol,0,ref.alignment);
  368. hreg:=getaddressregister(list);
  369. href.refaddr:=addr_pic_no_got;
  370. href.base:=NR_RIP;
  371. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  372. ref.symbol:=nil;
  373. end
  374. else
  375. begin
  376. ref.refaddr:=addr_pic_no_got;
  377. hreg:=NR_NO;
  378. ref.base:=NR_RIP;
  379. end;
  380. end
  381. else
  382. begin
  383. reference_reset_symbol(href,ref.symbol,0,ref.alignment);
  384. hreg:=getaddressregister(list);
  385. href.refaddr:=addr_pic;
  386. href.base:=NR_RIP;
  387. list.concat(taicpu.op_ref_reg(A_MOV,S_Q,href,hreg));
  388. ref.symbol:=nil;
  389. end;
  390. if ref.base=NR_NO then
  391. ref.base:=hreg
  392. else if ref.index=NR_NO then
  393. begin
  394. ref.index:=hreg;
  395. ref.scalefactor:=1;
  396. end
  397. else
  398. begin
  399. { don't use add, as the flags may contain a value }
  400. reference_reset_base(href,ref.base,0,8);
  401. href.index:=hreg;
  402. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  403. ref.base:=hreg;
  404. end;
  405. end
  406. else
  407. { Always use RIP relative symbol addressing for Windows and Darwin targets. }
  408. if (target_info.system in (systems_all_windows+[system_x86_64_darwin])) and (ref.base<>NR_RIP) then
  409. begin
  410. if (ref.refaddr=addr_no) and (ref.base=NR_NO) and (ref.index=NR_NO) then
  411. begin
  412. { Set RIP relative addressing for simple symbol references }
  413. ref.base:=NR_RIP;
  414. ref.refaddr:=addr_pic_no_got
  415. end
  416. else
  417. begin
  418. { Use temp register to load calculated 64-bit symbol address for complex references }
  419. reference_reset_symbol(href,ref.symbol,0,sizeof(pint));
  420. href.base:=NR_RIP;
  421. href.refaddr:=addr_pic_no_got;
  422. hreg:=GetAddressRegister(list);
  423. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  424. ref.symbol:=nil;
  425. if ref.base=NR_NO then
  426. ref.base:=hreg
  427. else if ref.index=NR_NO then
  428. begin
  429. ref.index:=hreg;
  430. ref.scalefactor:=0;
  431. end
  432. else
  433. begin
  434. { don't use add, as the flags may contain a value }
  435. reference_reset_base(href,ref.base,0,8);
  436. href.index:=hreg;
  437. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  438. ref.base:=hreg;
  439. end;
  440. end;
  441. end;
  442. end;
  443. {$elseif defined(i386)}
  444. add_hreg:=false;
  445. if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) then
  446. begin
  447. if assigned(ref.symbol) and
  448. not(assigned(ref.relsymbol)) and
  449. ((ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN]) or
  450. (cs_create_pic in current_settings.moduleswitches)) then
  451. begin
  452. if ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN] then
  453. begin
  454. hreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  455. ref.symbol:=nil;
  456. end
  457. else
  458. begin
  459. include(current_procinfo.flags,pi_needs_got);
  460. { make a copy of the got register, hreg can get modified }
  461. hreg:=cg.getaddressregister(list);
  462. a_load_reg_reg(list,OS_ADDR,OS_ADDR,current_procinfo.got,hreg);
  463. ref.relsymbol:=current_procinfo.CurrGOTLabel;
  464. end;
  465. add_hreg:=true
  466. end
  467. end
  468. else if (cs_create_pic in current_settings.moduleswitches) and
  469. assigned(ref.symbol) then
  470. begin
  471. reference_reset_symbol(href,ref.symbol,0,sizeof(pint));
  472. href.base:=current_procinfo.got;
  473. href.refaddr:=addr_pic;
  474. include(current_procinfo.flags,pi_needs_got);
  475. hreg:=cg.getaddressregister(list);
  476. list.concat(taicpu.op_ref_reg(A_MOV,S_L,href,hreg));
  477. ref.symbol:=nil;
  478. add_hreg:=true;
  479. end;
  480. if add_hreg then
  481. begin
  482. if ref.base=NR_NO then
  483. ref.base:=hreg
  484. else if ref.index=NR_NO then
  485. begin
  486. ref.index:=hreg;
  487. ref.scalefactor:=1;
  488. end
  489. else
  490. begin
  491. { don't use add, as the flags may contain a value }
  492. reference_reset_base(href,ref.base,0,8);
  493. href.index:=hreg;
  494. list.concat(taicpu.op_ref_reg(A_LEA,S_L,href,hreg));
  495. ref.base:=hreg;
  496. end;
  497. end;
  498. {$elseif defined(i8086)}
  499. { i8086 does not support stack relative addressing }
  500. if ref.base = NR_STACK_POINTER_REG then
  501. begin
  502. href:=ref;
  503. href.base:=getaddressregister(list);
  504. { let the register allocator find a suitable register for the reference }
  505. list.Concat(Taicpu.op_reg_reg(A_MOV, S_W, NR_SP, href.base));
  506. ref:=href;
  507. end;
  508. { if there is a segment in an int register, move it to ES }
  509. if (ref.segment<>NR_NO) and (not is_segment_reg(ref.segment)) then
  510. begin
  511. list.concat(taicpu.op_reg(A_PUSH,S_W,ref.segment));
  512. list.concat(taicpu.op_reg(A_POP,S_W,NR_ES));
  513. ref.segment:=NR_ES;
  514. end;
  515. {$endif}
  516. end;
  517. procedure tcgx86.floatloadops(t : tcgsize;var op : tasmop;var s : topsize);
  518. begin
  519. case t of
  520. OS_F32 :
  521. begin
  522. op:=A_FLD;
  523. s:=S_FS;
  524. end;
  525. OS_F64 :
  526. begin
  527. op:=A_FLD;
  528. s:=S_FL;
  529. end;
  530. OS_F80 :
  531. begin
  532. op:=A_FLD;
  533. s:=S_FX;
  534. end;
  535. OS_C64 :
  536. begin
  537. op:=A_FILD;
  538. s:=S_IQ;
  539. end;
  540. else
  541. internalerror(200204043);
  542. end;
  543. end;
  544. procedure tcgx86.floatload(list: TAsmList; t : tcgsize;const ref : treference);
  545. var
  546. op : tasmop;
  547. s : topsize;
  548. tmpref : treference;
  549. begin
  550. tmpref:=ref;
  551. make_simple_ref(list,tmpref);
  552. floatloadops(t,op,s);
  553. list.concat(Taicpu.Op_ref(op,s,tmpref));
  554. inc_fpu_stack;
  555. end;
  556. procedure tcgx86.floatstoreops(t : tcgsize;var op : tasmop;var s : topsize);
  557. begin
  558. case t of
  559. OS_F32 :
  560. begin
  561. op:=A_FSTP;
  562. s:=S_FS;
  563. end;
  564. OS_F64 :
  565. begin
  566. op:=A_FSTP;
  567. s:=S_FL;
  568. end;
  569. OS_F80 :
  570. begin
  571. op:=A_FSTP;
  572. s:=S_FX;
  573. end;
  574. OS_C64 :
  575. begin
  576. op:=A_FISTP;
  577. s:=S_IQ;
  578. end;
  579. else
  580. internalerror(200204042);
  581. end;
  582. end;
  583. procedure tcgx86.floatstore(list: TAsmList; t : tcgsize;const ref : treference);
  584. var
  585. op : tasmop;
  586. s : topsize;
  587. tmpref : treference;
  588. begin
  589. tmpref:=ref;
  590. make_simple_ref(list,tmpref);
  591. floatstoreops(t,op,s);
  592. list.concat(Taicpu.Op_ref(op,s,tmpref));
  593. { storing non extended floats can cause a floating point overflow }
  594. if (t<>OS_F80) and
  595. (cs_fpu_fwait in current_settings.localswitches) then
  596. list.concat(Taicpu.Op_none(A_FWAIT,S_NO));
  597. dec_fpu_stack;
  598. end;
  599. procedure tcgx86.check_register_size(size:tcgsize;reg:tregister);
  600. begin
  601. if TCGSize2OpSize[size]<>TCGSize2OpSize[reg_cgsize(reg)] then
  602. internalerror(200306031);
  603. end;
  604. {****************************************************************************
  605. Assembler code
  606. ****************************************************************************}
  607. procedure tcgx86.a_jmp_name(list : TAsmList;const s : string);
  608. var
  609. r: treference;
  610. begin
  611. if (target_info.system <> system_i386_darwin) then
  612. list.concat(taicpu.op_sym(A_JMP,S_NO,current_asmdata.RefAsmSymbol(s)))
  613. else
  614. begin
  615. reference_reset_symbol(r,get_darwin_call_stub(s,false),0,sizeof(pint));
  616. r.refaddr:=addr_full;
  617. list.concat(taicpu.op_ref(A_JMP,S_NO,r));
  618. end;
  619. end;
  620. procedure tcgx86.a_jmp_always(list : TAsmList;l: tasmlabel);
  621. begin
  622. a_jmp_cond(list, OC_NONE, l);
  623. end;
  624. function tcgx86.get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
  625. var
  626. stubname: string;
  627. begin
  628. stubname := 'L'+s+'$stub';
  629. result := current_asmdata.getasmsymbol(stubname);
  630. if assigned(result) then
  631. exit;
  632. if current_asmdata.asmlists[al_imports]=nil then
  633. current_asmdata.asmlists[al_imports]:=TAsmList.create;
  634. new_section(current_asmdata.asmlists[al_imports],sec_stub,'',0);
  635. result := current_asmdata.RefAsmSymbol(stubname);
  636. current_asmdata.asmlists[al_imports].concat(Tai_symbol.Create(result,0));
  637. { register as a weak symbol if necessary }
  638. if weak then
  639. current_asmdata.weakrefasmsymbol(s);
  640. current_asmdata.asmlists[al_imports].concat(tai_directive.create(asd_indirect_symbol,s));
  641. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  642. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  643. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  644. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  645. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  646. end;
  647. procedure tcgx86.a_call_name(list : TAsmList;const s : string; weak: boolean);
  648. var
  649. sym : tasmsymbol;
  650. r : treference;
  651. begin
  652. if (target_info.system <> system_i386_darwin) then
  653. begin
  654. if not(weak) then
  655. sym:=current_asmdata.RefAsmSymbol(s)
  656. else
  657. sym:=current_asmdata.WeakRefAsmSymbol(s);
  658. reference_reset_symbol(r,sym,0,sizeof(pint));
  659. if (cs_create_pic in current_settings.moduleswitches) and
  660. { darwin's assembler doesn't want @PLT after call symbols }
  661. not(target_info.system in [system_x86_64_darwin,system_i386_iphonesim]) then
  662. begin
  663. {$ifdef i386}
  664. include(current_procinfo.flags,pi_needs_got);
  665. {$endif i386}
  666. r.refaddr:=addr_pic
  667. end
  668. else
  669. r.refaddr:=addr_full;
  670. end
  671. else
  672. begin
  673. reference_reset_symbol(r,get_darwin_call_stub(s,weak),0,sizeof(pint));
  674. r.refaddr:=addr_full;
  675. end;
  676. list.concat(taicpu.op_ref(A_CALL,S_NO,r));
  677. end;
  678. procedure tcgx86.a_call_name_static(list : TAsmList;const s : string);
  679. var
  680. sym : tasmsymbol;
  681. r : treference;
  682. begin
  683. sym:=current_asmdata.RefAsmSymbol(s);
  684. reference_reset_symbol(r,sym,0,sizeof(pint));
  685. r.refaddr:=addr_full;
  686. list.concat(taicpu.op_ref(A_CALL,S_NO,r));
  687. end;
  688. procedure tcgx86.a_call_reg(list : TAsmList;reg : tregister);
  689. begin
  690. list.concat(taicpu.op_reg(A_CALL,S_NO,reg));
  691. end;
  692. procedure tcgx86.a_call_ref(list : TAsmList;ref : treference);
  693. begin
  694. list.concat(taicpu.op_ref(A_CALL,S_NO,ref));
  695. end;
  696. {********************** load instructions ********************}
  697. procedure tcgx86.a_load_const_reg(list : TAsmList; tosize: TCGSize; a : tcgint; reg : TRegister);
  698. begin
  699. check_register_size(tosize,reg);
  700. { the optimizer will change it to "xor reg,reg" when loading zero, }
  701. { no need to do it here too (JM) }
  702. list.concat(taicpu.op_const_reg(A_MOV,TCGSize2OpSize[tosize],a,reg))
  703. end;
  704. procedure tcgx86.a_load_const_ref(list : TAsmList; tosize: tcgsize; a : tcgint;const ref : treference);
  705. var
  706. tmpref : treference;
  707. begin
  708. tmpref:=ref;
  709. make_simple_ref(list,tmpref);
  710. {$ifdef x86_64}
  711. { x86_64 only supports signed 32 bits constants directly }
  712. if (tosize in [OS_S64,OS_64]) and
  713. ((a<low(longint)) or (a>high(longint))) then
  714. begin
  715. a_load_const_ref(list,OS_32,longint(a and $ffffffff),tmpref);
  716. inc(tmpref.offset,4);
  717. a_load_const_ref(list,OS_32,longint(a shr 32),tmpref);
  718. end
  719. else
  720. {$endif x86_64}
  721. list.concat(taicpu.op_const_ref(A_MOV,TCGSize2OpSize[tosize],a,tmpref));
  722. end;
  723. procedure tcgx86.a_load_reg_ref(list : TAsmList; fromsize,tosize: TCGSize; reg : tregister;const ref : treference);
  724. var
  725. op: tasmop;
  726. s: topsize;
  727. tmpsize : tcgsize;
  728. tmpreg : tregister;
  729. tmpref : treference;
  730. begin
  731. tmpref:=ref;
  732. make_simple_ref(list,tmpref);
  733. check_register_size(fromsize,reg);
  734. sizes2load(fromsize,tosize,op,s);
  735. case s of
  736. {$ifdef x86_64}
  737. S_BQ,S_WQ,S_LQ,
  738. {$endif x86_64}
  739. S_BW,S_BL,S_WL :
  740. begin
  741. tmpreg:=getintregister(list,tosize);
  742. {$ifdef x86_64}
  743. { zero extensions to 64 bit on the x86_64 are simply done by writting to the lower 32 bit
  744. which clears the upper 64 bit too, so it could be that s is S_L while the reg is
  745. 64 bit (FK) }
  746. if s in [S_BL,S_WL,S_L] then
  747. begin
  748. tmpreg:=makeregsize(list,tmpreg,OS_32);
  749. tmpsize:=OS_32;
  750. end
  751. else
  752. {$endif x86_64}
  753. tmpsize:=tosize;
  754. list.concat(taicpu.op_reg_reg(op,s,reg,tmpreg));
  755. a_load_reg_ref(list,tmpsize,tosize,tmpreg,tmpref);
  756. end;
  757. else
  758. list.concat(taicpu.op_reg_ref(op,s,reg,tmpref));
  759. end;
  760. end;
  761. procedure tcgx86.a_load_ref_reg(list : TAsmList;fromsize,tosize : tcgsize;const ref: treference;reg : tregister);
  762. var
  763. op: tasmop;
  764. s: topsize;
  765. tmpref : treference;
  766. begin
  767. tmpref:=ref;
  768. make_simple_ref(list,tmpref);
  769. check_register_size(tosize,reg);
  770. sizes2load(fromsize,tosize,op,s);
  771. {$ifdef x86_64}
  772. { zero extensions to 64 bit on the x86_64 are simply done by writting to the lower 32 bit
  773. which clears the upper 64 bit too, so it could be that s is S_L while the reg is
  774. 64 bit (FK) }
  775. if s in [S_BL,S_WL,S_L] then
  776. reg:=makeregsize(list,reg,OS_32);
  777. {$endif x86_64}
  778. list.concat(taicpu.op_ref_reg(op,s,tmpref,reg));
  779. end;
  780. procedure tcgx86.a_load_reg_reg(list : TAsmList;fromsize,tosize : tcgsize;reg1,reg2 : tregister);
  781. var
  782. op: tasmop;
  783. s: topsize;
  784. instr:Taicpu;
  785. begin
  786. check_register_size(fromsize,reg1);
  787. check_register_size(tosize,reg2);
  788. if tcgsize2size[fromsize]>tcgsize2size[tosize] then
  789. begin
  790. reg1:=makeregsize(list,reg1,tosize);
  791. s:=tcgsize2opsize[tosize];
  792. op:=A_MOV;
  793. end
  794. else
  795. sizes2load(fromsize,tosize,op,s);
  796. {$ifdef x86_64}
  797. { zero extensions to 64 bit on the x86_64 are simply done by writting to the lower 32 bit
  798. which clears the upper 64 bit too, so it could be that s is S_L while the reg is
  799. 64 bit (FK)
  800. }
  801. if s in [S_BL,S_WL,S_L] then
  802. reg2:=makeregsize(list,reg2,OS_32);
  803. {$endif x86_64}
  804. if (reg1<>reg2) then
  805. begin
  806. instr:=taicpu.op_reg_reg(op,s,reg1,reg2);
  807. { Notify the register allocator that we have written a move instruction so
  808. it can try to eliminate it. }
  809. if (reg1<>current_procinfo.framepointer) and (reg1<>NR_STACK_POINTER_REG) then
  810. add_move_instruction(instr);
  811. list.concat(instr);
  812. end;
  813. {$ifdef x86_64}
  814. { avoid merging of registers and killing the zero extensions (FK) }
  815. if (tosize in [OS_64,OS_S64]) and (s=S_L) then
  816. list.concat(taicpu.op_const_reg(A_AND,S_L,$ffffffff,reg2));
  817. {$endif x86_64}
  818. end;
  819. procedure tcgx86.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  820. var
  821. tmpref : treference;
  822. begin
  823. with ref do
  824. begin
  825. if (base=NR_NO) and (index=NR_NO) then
  826. begin
  827. if assigned(ref.symbol) then
  828. begin
  829. if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) and
  830. ((ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL]) or
  831. (cs_create_pic in current_settings.moduleswitches)) then
  832. begin
  833. if (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL]) or
  834. ((cs_create_pic in current_settings.moduleswitches) and
  835. (ref.symbol.bind in [AB_COMMON,AB_GLOBAL,AB_PRIVATE_EXTERN])) then
  836. begin
  837. reference_reset_base(tmpref,
  838. g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol)),
  839. offset,sizeof(pint));
  840. a_loadaddr_ref_reg(list,tmpref,r);
  841. end
  842. else
  843. begin
  844. include(current_procinfo.flags,pi_needs_got);
  845. reference_reset_base(tmpref,current_procinfo.got,offset,ref.alignment);
  846. tmpref.symbol:=symbol;
  847. tmpref.relsymbol:=current_procinfo.CurrGOTLabel;
  848. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],tmpref,r));
  849. end;
  850. end
  851. else if (cs_create_pic in current_settings.moduleswitches)
  852. {$ifdef x86_64}
  853. and not(ref.symbol.bind=AB_LOCAL)
  854. {$endif x86_64}
  855. then
  856. begin
  857. {$ifdef x86_64}
  858. reference_reset_symbol(tmpref,ref.symbol,0,ref.alignment);
  859. tmpref.refaddr:=addr_pic;
  860. tmpref.base:=NR_RIP;
  861. list.concat(taicpu.op_ref_reg(A_MOV,S_Q,tmpref,r));
  862. {$else x86_64}
  863. reference_reset_symbol(tmpref,ref.symbol,0,ref.alignment);
  864. tmpref.refaddr:=addr_pic;
  865. tmpref.base:=current_procinfo.got;
  866. include(current_procinfo.flags,pi_needs_got);
  867. list.concat(taicpu.op_ref_reg(A_MOV,S_L,tmpref,r));
  868. {$endif x86_64}
  869. if offset<>0 then
  870. a_op_const_reg(list,OP_ADD,OS_ADDR,offset,r);
  871. end
  872. {$ifdef x86_64}
  873. else if (target_info.system in (systems_all_windows+[system_x86_64_darwin]))
  874. or (cs_create_pic in current_settings.moduleswitches)
  875. then
  876. begin
  877. { Win64 and Darwin/x86_64 always require RIP-relative addressing }
  878. tmpref:=ref;
  879. tmpref.base:=NR_RIP;
  880. tmpref.refaddr:=addr_pic_no_got;
  881. list.concat(Taicpu.op_ref_reg(A_LEA,S_Q,tmpref,r));
  882. end
  883. {$endif x86_64}
  884. else
  885. begin
  886. tmpref:=ref;
  887. tmpref.refaddr:=ADDR_FULL;
  888. list.concat(Taicpu.op_ref_reg(A_MOV,tcgsize2opsize[OS_ADDR],tmpref,r));
  889. end
  890. end
  891. else
  892. a_load_const_reg(list,OS_ADDR,offset,r)
  893. end
  894. else if (base=NR_NO) and (index<>NR_NO) and
  895. (offset=0) and (scalefactor=0) and (symbol=nil) then
  896. a_load_reg_reg(list,OS_ADDR,OS_ADDR,index,r)
  897. else if (base<>NR_NO) and (index=NR_NO) and
  898. (offset=0) and (symbol=nil) then
  899. a_load_reg_reg(list,OS_ADDR,OS_ADDR,base,r)
  900. else
  901. begin
  902. tmpref:=ref;
  903. make_simple_ref(list,tmpref);
  904. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],tmpref,r));
  905. end;
  906. if segment<>NR_NO then
  907. begin
  908. if (tf_section_threadvars in target_info.flags) then
  909. begin
  910. { Convert thread local address to a process global addres
  911. as we cannot handle far pointers.}
  912. case target_info.system of
  913. system_i386_linux,system_i386_android:
  914. if segment=NR_GS then
  915. begin
  916. reference_reset_symbol(tmpref,current_asmdata.RefAsmSymbol('___fpc_threadvar_offset'),0,ref.alignment);
  917. tmpref.segment:=NR_GS;
  918. list.concat(Taicpu.op_ref_reg(A_ADD,tcgsize2opsize[OS_ADDR],tmpref,r));
  919. end
  920. else
  921. cgmessage(cg_e_cant_use_far_pointer_there);
  922. else
  923. cgmessage(cg_e_cant_use_far_pointer_there);
  924. end;
  925. end
  926. else
  927. cgmessage(cg_e_cant_use_far_pointer_there);
  928. end;
  929. end;
  930. end;
  931. { all fpu load routines expect that R_ST[0-7] means an fpu regvar and }
  932. { R_ST means "the current value at the top of the fpu stack" (JM) }
  933. procedure tcgx86.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
  934. var
  935. href: treference;
  936. op: tasmop;
  937. s: topsize;
  938. begin
  939. if (reg1<>NR_ST) then
  940. begin
  941. floatloadops(tosize,op,s);
  942. list.concat(taicpu.op_reg(op,s,rgfpu.correct_fpuregister(reg1,rgfpu.fpuvaroffset)));
  943. inc_fpu_stack;
  944. end;
  945. if (reg2<>NR_ST) then
  946. begin
  947. floatstoreops(tosize,op,s);
  948. list.concat(taicpu.op_reg(op,s,rgfpu.correct_fpuregister(reg2,rgfpu.fpuvaroffset)));
  949. dec_fpu_stack;
  950. end;
  951. { OS_F80 < OS_C64, but OS_C64 fits perfectly in OS_F80 }
  952. if (reg1=NR_ST) and
  953. (reg2=NR_ST) and
  954. (tosize<>OS_F80) and
  955. (tosize<fromsize) then
  956. begin
  957. { can't round down to lower precision in x87 :/ }
  958. tg.gettemp(list,tcgsize2size[tosize],tcgsize2size[tosize],tt_normal,href);
  959. a_loadfpu_reg_ref(list,fromsize,tosize,NR_ST,href);
  960. a_loadfpu_ref_reg(list,tosize,tosize,href,NR_ST);
  961. tg.ungettemp(list,href);
  962. end;
  963. end;
  964. procedure tcgx86.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  965. begin
  966. floatload(list,fromsize,ref);
  967. a_loadfpu_reg_reg(list,fromsize,tosize,NR_ST,reg);
  968. end;
  969. procedure tcgx86.a_loadfpu_reg_ref(list: TAsmList; fromsize,tosize: tcgsize; reg: tregister; const ref: treference);
  970. begin
  971. { in case a record returned in a floating point register
  972. (LOC_FPUREGISTER with OS_F32/OS_F64) is stored in memory
  973. (LOC_REFERENCE with OS_32/OS_64), we have to adjust the
  974. tosize }
  975. if (fromsize in [OS_F32,OS_F64]) and
  976. (tcgsize2size[fromsize]=tcgsize2size[tosize]) then
  977. case tosize of
  978. OS_32:
  979. tosize:=OS_F32;
  980. OS_64:
  981. tosize:=OS_F64;
  982. end;
  983. if reg<>NR_ST then
  984. a_loadfpu_reg_reg(list,fromsize,tosize,reg,NR_ST);
  985. floatstore(list,tosize,ref);
  986. end;
  987. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  988. const
  989. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  990. (A_MOVSS,A_CVTSS2SD,A_NONE,A_NONE,A_NONE),
  991. (A_CVTSD2SS,A_MOVSD,A_NONE,A_NONE,A_NONE),
  992. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  993. (A_NONE,A_NONE,A_NONE,A_MOVQ,A_NONE),
  994. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  995. begin
  996. { we can have OS_F32/OS_F64 (record in function result/LOC_MMREGISTER) to
  997. OS_32/OS_64 (record in memory/LOC_REFERENCE) }
  998. if (fromsize in [OS_F32,OS_F64]) and
  999. (tcgsize2size[fromsize]=tcgsize2size[tosize]) then
  1000. case tosize of
  1001. OS_32:
  1002. tosize:=OS_F32;
  1003. OS_64:
  1004. tosize:=OS_F64;
  1005. end;
  1006. if (fromsize in [low(convertop)..high(convertop)]) and
  1007. (tosize in [low(convertop)..high(convertop)]) then
  1008. result:=convertop[fromsize,tosize]
  1009. { we can have OS_M64 (record in function result/LOC_MMREGISTER) to
  1010. OS_64 (record in memory/LOC_REFERENCE) }
  1011. else if (tcgsize2size[fromsize]=tcgsize2size[tosize]) and
  1012. (fromsize=OS_M64) then
  1013. result:=A_MOVQ
  1014. else
  1015. internalerror(2010060104);
  1016. if result=A_NONE then
  1017. internalerror(200312205);
  1018. end;
  1019. procedure tcgx86.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle);
  1020. var
  1021. instr : taicpu;
  1022. begin
  1023. if shuffle=nil then
  1024. begin
  1025. if fromsize=tosize then
  1026. { needs correct size in case of spilling }
  1027. case fromsize of
  1028. OS_F32:
  1029. instr:=taicpu.op_reg_reg(A_MOVAPS,S_NO,reg1,reg2);
  1030. OS_F64:
  1031. instr:=taicpu.op_reg_reg(A_MOVAPD,S_NO,reg1,reg2);
  1032. OS_M64:
  1033. instr:=taicpu.op_reg_reg(A_MOVQ,S_NO,reg1,reg2);
  1034. else
  1035. internalerror(2006091201);
  1036. end
  1037. else
  1038. internalerror(200312202);
  1039. add_move_instruction(instr);
  1040. end
  1041. else if shufflescalar(shuffle) then
  1042. begin
  1043. instr:=taicpu.op_reg_reg(get_scalar_mm_op(fromsize,tosize),S_NO,reg1,reg2);
  1044. case get_scalar_mm_op(fromsize,tosize) of
  1045. A_MOVSS,
  1046. A_MOVSD,
  1047. A_MOVQ:
  1048. add_move_instruction(instr);
  1049. end;
  1050. end
  1051. else
  1052. internalerror(200312201);
  1053. list.concat(instr);
  1054. end;
  1055. procedure tcgx86.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
  1056. var
  1057. tmpref : treference;
  1058. begin
  1059. tmpref:=ref;
  1060. make_simple_ref(list,tmpref);
  1061. if shuffle=nil then
  1062. begin
  1063. if fromsize=OS_M64 then
  1064. list.concat(taicpu.op_ref_reg(A_MOVQ,S_NO,tmpref,reg))
  1065. else
  1066. {$ifdef x86_64}
  1067. { x86-64 has always properly aligned data }
  1068. list.concat(taicpu.op_ref_reg(A_MOVDQA,S_NO,tmpref,reg));
  1069. {$else x86_64}
  1070. list.concat(taicpu.op_ref_reg(A_MOVDQU,S_NO,tmpref,reg));
  1071. {$endif x86_64}
  1072. end
  1073. else if shufflescalar(shuffle) then
  1074. list.concat(taicpu.op_ref_reg(get_scalar_mm_op(fromsize,tosize),S_NO,tmpref,reg))
  1075. else
  1076. internalerror(200312252);
  1077. end;
  1078. procedure tcgx86.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle);
  1079. var
  1080. hreg : tregister;
  1081. tmpref : treference;
  1082. begin
  1083. tmpref:=ref;
  1084. make_simple_ref(list,tmpref);
  1085. if shuffle=nil then
  1086. begin
  1087. if fromsize=OS_M64 then
  1088. list.concat(taicpu.op_reg_ref(A_MOVQ,S_NO,reg,tmpref))
  1089. else
  1090. {$ifdef x86_64}
  1091. { x86-64 has always properly aligned data }
  1092. list.concat(taicpu.op_reg_ref(A_MOVDQA,S_NO,reg,tmpref))
  1093. {$else x86_64}
  1094. list.concat(taicpu.op_reg_ref(A_MOVDQU,S_NO,reg,tmpref))
  1095. {$endif x86_64}
  1096. end
  1097. else if shufflescalar(shuffle) then
  1098. begin
  1099. if tcgsize2size[tosize]<>tcgsize2size[fromsize] then
  1100. begin
  1101. hreg:=getmmregister(list,tosize);
  1102. list.concat(taicpu.op_reg_reg(get_scalar_mm_op(fromsize,tosize),S_NO,reg,hreg));
  1103. list.concat(taicpu.op_reg_ref(get_scalar_mm_op(tosize,tosize),S_NO,hreg,tmpref));
  1104. end
  1105. else
  1106. list.concat(taicpu.op_reg_ref(get_scalar_mm_op(fromsize,tosize),S_NO,reg,tmpref));
  1107. end
  1108. else
  1109. internalerror(200312252);
  1110. end;
  1111. procedure tcgx86.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
  1112. var
  1113. l : tlocation;
  1114. begin
  1115. l.loc:=LOC_REFERENCE;
  1116. l.reference:=ref;
  1117. l.size:=size;
  1118. opmm_loc_reg(list,op,size,l,reg,shuffle);
  1119. end;
  1120. procedure tcgx86.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle);
  1121. var
  1122. l : tlocation;
  1123. begin
  1124. l.loc:=LOC_MMREGISTER;
  1125. l.register:=src;
  1126. l.size:=size;
  1127. opmm_loc_reg(list,op,size,l,dst,shuffle);
  1128. end;
  1129. procedure tcgx86.opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;dst: tregister; shuffle : pmmshuffle);
  1130. const
  1131. opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
  1132. ( { scalar }
  1133. ( { OS_F32 }
  1134. A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP,A_NOP,A_NOP
  1135. ),
  1136. ( { OS_F64 }
  1137. A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP,A_NOP,A_NOP
  1138. )
  1139. ),
  1140. ( { vectorized/packed }
  1141. { because the logical packed single instructions have shorter op codes, we use always
  1142. these
  1143. }
  1144. ( { OS_F32 }
  1145. A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS,A_NOP,A_NOP
  1146. ),
  1147. ( { OS_F64 }
  1148. A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD,A_NOP,A_NOP
  1149. )
  1150. )
  1151. );
  1152. var
  1153. resultreg : tregister;
  1154. asmop : tasmop;
  1155. begin
  1156. { this is an internally used procedure so the parameters have
  1157. some constrains
  1158. }
  1159. if loc.size<>size then
  1160. internalerror(200312213);
  1161. resultreg:=dst;
  1162. { deshuffle }
  1163. //!!!
  1164. if (shuffle<>nil) and not(shufflescalar(shuffle)) then
  1165. begin
  1166. internalerror(2010060101);
  1167. end
  1168. else if (shuffle=nil) then
  1169. asmop:=opmm2asmop[1,size,op]
  1170. else if shufflescalar(shuffle) then
  1171. begin
  1172. asmop:=opmm2asmop[0,size,op];
  1173. { no scalar operation available? }
  1174. if asmop=A_NOP then
  1175. begin
  1176. { do vectorized and shuffle finally }
  1177. internalerror(2010060102);
  1178. end;
  1179. end
  1180. else
  1181. internalerror(200312211);
  1182. if asmop=A_NOP then
  1183. internalerror(200312216);
  1184. case loc.loc of
  1185. LOC_CREFERENCE,LOC_REFERENCE:
  1186. begin
  1187. make_simple_ref(current_asmdata.CurrAsmList,loc.reference);
  1188. list.concat(taicpu.op_ref_reg(asmop,S_NO,loc.reference,resultreg));
  1189. end;
  1190. LOC_CMMREGISTER,LOC_MMREGISTER:
  1191. list.concat(taicpu.op_reg_reg(asmop,S_NO,loc.register,resultreg));
  1192. else
  1193. internalerror(200312214);
  1194. end;
  1195. { shuffle }
  1196. if resultreg<>dst then
  1197. begin
  1198. internalerror(200312212);
  1199. end;
  1200. end;
  1201. procedure tcgx86.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  1202. var
  1203. opcode : tasmop;
  1204. power : longint;
  1205. {$ifdef x86_64}
  1206. tmpreg : tregister;
  1207. {$endif x86_64}
  1208. begin
  1209. optimize_op_const(op, a);
  1210. {$ifdef x86_64}
  1211. { x86_64 only supports signed 32 bits constants directly }
  1212. if not(op in [OP_NONE,OP_MOVE]) and
  1213. (size in [OS_S64,OS_64]) and
  1214. ((a<low(longint)) or (a>high(longint))) then
  1215. begin
  1216. tmpreg:=getintregister(list,size);
  1217. a_load_const_reg(list,size,a,tmpreg);
  1218. a_op_reg_reg(list,op,size,tmpreg,reg);
  1219. exit;
  1220. end;
  1221. {$endif x86_64}
  1222. check_register_size(size,reg);
  1223. case op of
  1224. OP_NONE :
  1225. begin
  1226. { Opcode is optimized away }
  1227. end;
  1228. OP_MOVE :
  1229. begin
  1230. { Optimized, replaced with a simple load }
  1231. a_load_const_reg(list,size,a,reg);
  1232. end;
  1233. OP_DIV, OP_IDIV:
  1234. begin
  1235. if ispowerof2(int64(a),power) then
  1236. begin
  1237. case op of
  1238. OP_DIV:
  1239. opcode := A_SHR;
  1240. OP_IDIV:
  1241. opcode := A_SAR;
  1242. end;
  1243. list.concat(taicpu.op_const_reg(opcode,TCgSize2OpSize[size],power,reg));
  1244. exit;
  1245. end;
  1246. { the rest should be handled specifically in the code }
  1247. { generator because of the silly register usage restraints }
  1248. internalerror(200109224);
  1249. end;
  1250. OP_MUL,OP_IMUL:
  1251. begin
  1252. if not(cs_check_overflow in current_settings.localswitches) and
  1253. ispowerof2(int64(a),power) then
  1254. begin
  1255. list.concat(taicpu.op_const_reg(A_SHL,TCgSize2OpSize[size],power,reg));
  1256. exit;
  1257. end;
  1258. if op = OP_IMUL then
  1259. list.concat(taicpu.op_const_reg(A_IMUL,TCgSize2OpSize[size],a,reg))
  1260. else
  1261. { OP_MUL should be handled specifically in the code }
  1262. { generator because of the silly register usage restraints }
  1263. internalerror(200109225);
  1264. end;
  1265. OP_ADD, OP_AND, OP_OR, OP_SUB, OP_XOR:
  1266. if not(cs_check_overflow in current_settings.localswitches) and
  1267. (a = 1) and
  1268. (op in [OP_ADD,OP_SUB]) then
  1269. if op = OP_ADD then
  1270. list.concat(taicpu.op_reg(A_INC,TCgSize2OpSize[size],reg))
  1271. else
  1272. list.concat(taicpu.op_reg(A_DEC,TCgSize2OpSize[size],reg))
  1273. else if (a = 0) then
  1274. if (op <> OP_AND) then
  1275. exit
  1276. else
  1277. list.concat(taicpu.op_const_reg(A_MOV,TCgSize2OpSize[size],0,reg))
  1278. else if (aword(a) = high(aword)) and
  1279. (op in [OP_AND,OP_OR,OP_XOR]) then
  1280. begin
  1281. case op of
  1282. OP_AND:
  1283. exit;
  1284. OP_OR:
  1285. list.concat(taicpu.op_const_reg(A_MOV,TCgSize2OpSize[size],aint(high(aword)),reg));
  1286. OP_XOR:
  1287. list.concat(taicpu.op_reg(A_NOT,TCgSize2OpSize[size],reg));
  1288. end
  1289. end
  1290. else
  1291. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],aint(a),reg));
  1292. OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
  1293. begin
  1294. {$if defined(x86_64)}
  1295. if (a and 63) <> 0 Then
  1296. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a and 63,reg));
  1297. if (a shr 6) <> 0 Then
  1298. internalerror(200609073);
  1299. {$elseif defined(i386)}
  1300. if (a and 31) <> 0 Then
  1301. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a and 31,reg));
  1302. if (a shr 5) <> 0 Then
  1303. internalerror(200609071);
  1304. {$elseif defined(i8086)}
  1305. if (a shr 5) <> 0 Then
  1306. internalerror(2013043002);
  1307. a := a and 31;
  1308. if a <> 0 Then
  1309. begin
  1310. if (current_settings.cputype < cpu_186) and (a <> 1) then
  1311. begin
  1312. getcpuregister(list,NR_CL);
  1313. a_load_const_reg(list,OS_8,a,NR_CL);
  1314. list.concat(taicpu.op_reg_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],NR_CL,reg));
  1315. ungetcpuregister(list,NR_CL);
  1316. end
  1317. else
  1318. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a,reg));
  1319. end;
  1320. {$endif}
  1321. end
  1322. else internalerror(200609072);
  1323. end;
  1324. end;
  1325. procedure tcgx86.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  1326. var
  1327. opcode: tasmop;
  1328. power: longint;
  1329. {$ifdef x86_64}
  1330. tmpreg : tregister;
  1331. {$endif x86_64}
  1332. tmpref : treference;
  1333. begin
  1334. optimize_op_const(op, a);
  1335. tmpref:=ref;
  1336. make_simple_ref(list,tmpref);
  1337. {$ifdef x86_64}
  1338. { x86_64 only supports signed 32 bits constants directly }
  1339. if not(op in [OP_NONE,OP_MOVE]) and
  1340. (size in [OS_S64,OS_64]) and
  1341. ((a<low(longint)) or (a>high(longint))) then
  1342. begin
  1343. tmpreg:=getintregister(list,size);
  1344. a_load_const_reg(list,size,a,tmpreg);
  1345. a_op_reg_ref(list,op,size,tmpreg,tmpref);
  1346. exit;
  1347. end;
  1348. {$endif x86_64}
  1349. Case Op of
  1350. OP_NONE :
  1351. begin
  1352. { Opcode is optimized away }
  1353. end;
  1354. OP_MOVE :
  1355. begin
  1356. { Optimized, replaced with a simple load }
  1357. a_load_const_ref(list,size,a,ref);
  1358. end;
  1359. OP_DIV, OP_IDIV:
  1360. Begin
  1361. if ispowerof2(int64(a),power) then
  1362. begin
  1363. case op of
  1364. OP_DIV:
  1365. opcode := A_SHR;
  1366. OP_IDIV:
  1367. opcode := A_SAR;
  1368. end;
  1369. list.concat(taicpu.op_const_ref(opcode,
  1370. TCgSize2OpSize[size],power,tmpref));
  1371. exit;
  1372. end;
  1373. { the rest should be handled specifically in the code }
  1374. { generator because of the silly register usage restraints }
  1375. internalerror(200109231);
  1376. End;
  1377. OP_MUL,OP_IMUL:
  1378. begin
  1379. if not(cs_check_overflow in current_settings.localswitches) and
  1380. ispowerof2(int64(a),power) then
  1381. begin
  1382. list.concat(taicpu.op_const_ref(A_SHL,TCgSize2OpSize[size],
  1383. power,tmpref));
  1384. exit;
  1385. end;
  1386. { can't multiply a memory location directly with a constant }
  1387. if op = OP_IMUL then
  1388. inherited a_op_const_ref(list,op,size,a,tmpref)
  1389. else
  1390. { OP_MUL should be handled specifically in the code }
  1391. { generator because of the silly register usage restraints }
  1392. internalerror(200109232);
  1393. end;
  1394. OP_ADD, OP_AND, OP_OR, OP_SUB, OP_XOR:
  1395. if not(cs_check_overflow in current_settings.localswitches) and
  1396. (a = 1) and
  1397. (op in [OP_ADD,OP_SUB]) then
  1398. if op = OP_ADD then
  1399. list.concat(taicpu.op_ref(A_INC,TCgSize2OpSize[size],tmpref))
  1400. else
  1401. list.concat(taicpu.op_ref(A_DEC,TCgSize2OpSize[size],tmpref))
  1402. else if (a = 0) then
  1403. if (op <> OP_AND) then
  1404. exit
  1405. else
  1406. a_load_const_ref(list,size,0,tmpref)
  1407. else if (aword(a) = high(aword)) and
  1408. (op in [OP_AND,OP_OR,OP_XOR]) then
  1409. begin
  1410. case op of
  1411. OP_AND:
  1412. exit;
  1413. OP_OR:
  1414. list.concat(taicpu.op_const_ref(A_MOV,TCgSize2OpSize[size],aint(high(aword)),tmpref));
  1415. OP_XOR:
  1416. list.concat(taicpu.op_ref(A_NOT,TCgSize2OpSize[size],tmpref));
  1417. end
  1418. end
  1419. else
  1420. list.concat(taicpu.op_const_ref(TOpCG2AsmOp[op],
  1421. TCgSize2OpSize[size],a,tmpref));
  1422. OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
  1423. begin
  1424. if (a and 31) <> 0 then
  1425. list.concat(taicpu.op_const_ref(
  1426. TOpCG2AsmOp[op],TCgSize2OpSize[size],a and 31,tmpref));
  1427. if (a shr 5) <> 0 Then
  1428. internalerror(68991);
  1429. end
  1430. else internalerror(68992);
  1431. end;
  1432. end;
  1433. procedure tcgx86.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  1434. const
  1435. {$if defined(cpu64bitalu) or defined(cpu32bitalu)}
  1436. REGCX=NR_ECX;
  1437. REGCX_Size = OS_32;
  1438. {$elseif defined(cpu16bitalu)}
  1439. REGCX=NR_CX;
  1440. REGCX_Size = OS_16;
  1441. {$endif}
  1442. var
  1443. dstsize: topsize;
  1444. instr:Taicpu;
  1445. begin
  1446. check_register_size(size,src);
  1447. check_register_size(size,dst);
  1448. dstsize := tcgsize2opsize[size];
  1449. case op of
  1450. OP_NEG,OP_NOT:
  1451. begin
  1452. if src<>dst then
  1453. a_load_reg_reg(list,size,size,src,dst);
  1454. list.concat(taicpu.op_reg(TOpCG2AsmOp[op],dstsize,dst));
  1455. end;
  1456. OP_MUL,OP_DIV,OP_IDIV:
  1457. { special stuff, needs separate handling inside code }
  1458. { generator }
  1459. internalerror(200109233);
  1460. OP_SHR,OP_SHL,OP_SAR,OP_ROL,OP_ROR:
  1461. begin
  1462. { Use ecx to load the value, that allows better coalescing }
  1463. getcpuregister(list,REGCX);
  1464. a_load_reg_reg(list,size,REGCX_Size,src,REGCX);
  1465. list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));
  1466. ungetcpuregister(list,REGCX);
  1467. end;
  1468. else
  1469. begin
  1470. if reg2opsize(src) <> dstsize then
  1471. internalerror(200109226);
  1472. instr:=taicpu.op_reg_reg(TOpCG2AsmOp[op],dstsize,src,dst);
  1473. list.concat(instr);
  1474. end;
  1475. end;
  1476. end;
  1477. procedure tcgx86.a_op_ref_reg(list : TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister);
  1478. var
  1479. tmpref : treference;
  1480. begin
  1481. tmpref:=ref;
  1482. make_simple_ref(list,tmpref);
  1483. check_register_size(size,reg);
  1484. case op of
  1485. OP_NEG,OP_NOT,OP_IMUL:
  1486. begin
  1487. inherited a_op_ref_reg(list,op,size,tmpref,reg);
  1488. end;
  1489. OP_MUL,OP_DIV,OP_IDIV:
  1490. { special stuff, needs separate handling inside code }
  1491. { generator }
  1492. internalerror(200109239);
  1493. else
  1494. begin
  1495. reg := makeregsize(list,reg,size);
  1496. list.concat(taicpu.op_ref_reg(TOpCG2AsmOp[op],tcgsize2opsize[size],tmpref,reg));
  1497. end;
  1498. end;
  1499. end;
  1500. procedure tcgx86.a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference);
  1501. var
  1502. tmpref : treference;
  1503. begin
  1504. tmpref:=ref;
  1505. make_simple_ref(list,tmpref);
  1506. check_register_size(size,reg);
  1507. case op of
  1508. OP_NEG,OP_NOT:
  1509. begin
  1510. if reg<>NR_NO then
  1511. internalerror(200109237);
  1512. list.concat(taicpu.op_ref(TOpCG2AsmOp[op],tcgsize2opsize[size],tmpref));
  1513. end;
  1514. OP_IMUL:
  1515. begin
  1516. { this one needs a load/imul/store, which is the default }
  1517. inherited a_op_ref_reg(list,op,size,tmpref,reg);
  1518. end;
  1519. OP_MUL,OP_DIV,OP_IDIV:
  1520. { special stuff, needs separate handling inside code }
  1521. { generator }
  1522. internalerror(200109238);
  1523. else
  1524. begin
  1525. list.concat(taicpu.op_reg_ref(TOpCG2AsmOp[op],tcgsize2opsize[size],reg,tmpref));
  1526. end;
  1527. end;
  1528. end;
  1529. procedure tcgx86.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
  1530. var
  1531. opsize: topsize;
  1532. l : TAsmLabel;
  1533. begin
  1534. opsize:=tcgsize2opsize[size];
  1535. if not reverse then
  1536. list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,dst))
  1537. else
  1538. list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,dst));
  1539. current_asmdata.getjumplabel(l);
  1540. a_jmp_cond(list,OC_NE,l);
  1541. list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,dst));
  1542. a_label(list,l);
  1543. end;
  1544. {*************** compare instructructions ****************}
  1545. procedure tcgx86.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1546. l : tasmlabel);
  1547. {$ifdef x86_64}
  1548. var
  1549. tmpreg : tregister;
  1550. {$endif x86_64}
  1551. begin
  1552. {$ifdef x86_64}
  1553. { x86_64 only supports signed 32 bits constants directly }
  1554. if (size in [OS_S64,OS_64]) and
  1555. ((a<low(longint)) or (a>high(longint))) then
  1556. begin
  1557. tmpreg:=getintregister(list,size);
  1558. a_load_const_reg(list,size,a,tmpreg);
  1559. a_cmp_reg_reg_label(list,size,cmp_op,tmpreg,reg,l);
  1560. exit;
  1561. end;
  1562. {$endif x86_64}
  1563. if (a = 0) then
  1564. list.concat(taicpu.op_reg_reg(A_TEST,tcgsize2opsize[size],reg,reg))
  1565. else
  1566. list.concat(taicpu.op_const_reg(A_CMP,tcgsize2opsize[size],a,reg));
  1567. a_jmp_cond(list,cmp_op,l);
  1568. end;
  1569. procedure tcgx86.a_cmp_const_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;const ref : treference;
  1570. l : tasmlabel);
  1571. var
  1572. {$ifdef x86_64}
  1573. tmpreg : tregister;
  1574. {$endif x86_64}
  1575. tmpref : treference;
  1576. begin
  1577. tmpref:=ref;
  1578. make_simple_ref(list,tmpref);
  1579. {$ifdef x86_64}
  1580. { x86_64 only supports signed 32 bits constants directly }
  1581. if (size in [OS_S64,OS_64]) and
  1582. ((a<low(longint)) or (a>high(longint))) then
  1583. begin
  1584. tmpreg:=getintregister(list,size);
  1585. a_load_const_reg(list,size,a,tmpreg);
  1586. a_cmp_reg_ref_label(list,size,cmp_op,tmpreg,tmpref,l);
  1587. exit;
  1588. end;
  1589. {$endif x86_64}
  1590. list.concat(taicpu.op_const_ref(A_CMP,TCgSize2OpSize[size],a,tmpref));
  1591. a_jmp_cond(list,cmp_op,l);
  1592. end;
  1593. procedure tcgx86.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;
  1594. reg1,reg2 : tregister;l : tasmlabel);
  1595. begin
  1596. check_register_size(size,reg1);
  1597. check_register_size(size,reg2);
  1598. list.concat(taicpu.op_reg_reg(A_CMP,TCgSize2OpSize[size],reg1,reg2));
  1599. a_jmp_cond(list,cmp_op,l);
  1600. end;
  1601. procedure tcgx86.a_cmp_ref_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;const ref: treference; reg : tregister;l : tasmlabel);
  1602. var
  1603. tmpref : treference;
  1604. begin
  1605. tmpref:=ref;
  1606. make_simple_ref(list,tmpref);
  1607. check_register_size(size,reg);
  1608. list.concat(taicpu.op_ref_reg(A_CMP,TCgSize2OpSize[size],tmpref,reg));
  1609. a_jmp_cond(list,cmp_op,l);
  1610. end;
  1611. procedure tcgx86.a_cmp_reg_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg : tregister;const ref: treference; l : tasmlabel);
  1612. var
  1613. tmpref : treference;
  1614. begin
  1615. tmpref:=ref;
  1616. make_simple_ref(list,tmpref);
  1617. check_register_size(size,reg);
  1618. list.concat(taicpu.op_reg_ref(A_CMP,TCgSize2OpSize[size],reg,tmpref));
  1619. a_jmp_cond(list,cmp_op,l);
  1620. end;
  1621. procedure tcgx86.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  1622. var
  1623. ai : taicpu;
  1624. begin
  1625. if cond=OC_None then
  1626. ai := Taicpu.Op_sym(A_JMP,S_NO,l)
  1627. else
  1628. begin
  1629. ai:=Taicpu.Op_sym(A_Jcc,S_NO,l);
  1630. ai.SetCondition(TOpCmp2AsmCond[cond]);
  1631. end;
  1632. ai.is_jmp:=true;
  1633. list.concat(ai);
  1634. end;
  1635. procedure tcgx86.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1636. var
  1637. ai : taicpu;
  1638. begin
  1639. ai := Taicpu.op_sym(A_Jcc,S_NO,l);
  1640. ai.SetCondition(flags_to_cond(f));
  1641. ai.is_jmp := true;
  1642. list.concat(ai);
  1643. end;
  1644. procedure tcgx86.g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);
  1645. var
  1646. ai : taicpu;
  1647. hreg : tregister;
  1648. begin
  1649. hreg:=makeregsize(list,reg,OS_8);
  1650. ai:=Taicpu.op_reg(A_SETcc,S_B,hreg);
  1651. ai.setcondition(flags_to_cond(f));
  1652. list.concat(ai);
  1653. if reg<>hreg then
  1654. a_load_reg_reg(list,OS_8,size,hreg,reg);
  1655. end;
  1656. procedure tcgx86.g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference);
  1657. var
  1658. ai : taicpu;
  1659. tmpref : treference;
  1660. begin
  1661. tmpref:=ref;
  1662. make_simple_ref(list,tmpref);
  1663. if not(size in [OS_8,OS_S8]) then
  1664. a_load_const_ref(list,size,0,tmpref);
  1665. ai:=Taicpu.op_ref(A_SETcc,S_B,tmpref);
  1666. ai.setcondition(flags_to_cond(f));
  1667. list.concat(ai);
  1668. {$ifndef cpu64bitalu}
  1669. if size in [OS_S64,OS_64] then
  1670. begin
  1671. inc(tmpref.offset,4);
  1672. a_load_const_ref(list,OS_32,0,tmpref);
  1673. end;
  1674. {$endif cpu64bitalu}
  1675. end;
  1676. { ************* concatcopy ************ }
  1677. procedure Tcgx86.g_concatcopy(list:TAsmList;const source,dest:Treference;len:tcgint);
  1678. const
  1679. {$if defined(cpu64bitalu)}
  1680. REGCX=NR_RCX;
  1681. REGSI=NR_RSI;
  1682. REGDI=NR_RDI;
  1683. copy_len_sizes = [1, 2, 4, 8];
  1684. push_segment_size = S_L;
  1685. {$elseif defined(cpu32bitalu)}
  1686. REGCX=NR_ECX;
  1687. REGSI=NR_ESI;
  1688. REGDI=NR_EDI;
  1689. copy_len_sizes = [1, 2, 4];
  1690. push_segment_size = S_L;
  1691. {$elseif defined(cpu16bitalu)}
  1692. REGCX=NR_CX;
  1693. REGSI=NR_SI;
  1694. REGDI=NR_DI;
  1695. copy_len_sizes = [1, 2];
  1696. push_segment_size = S_W;
  1697. {$endif}
  1698. type copymode=(copy_move,copy_mmx,copy_string);
  1699. var srcref,dstref:Treference;
  1700. r,r0,r1,r2,r3:Tregister;
  1701. helpsize:tcgint;
  1702. copysize:byte;
  1703. cgsize:Tcgsize;
  1704. cm:copymode;
  1705. begin
  1706. cm:=copy_move;
  1707. helpsize:=3*sizeof(aword);
  1708. if cs_opt_size in current_settings.optimizerswitches then
  1709. helpsize:=2*sizeof(aword);
  1710. if (cs_mmx in current_settings.localswitches) and
  1711. not(pi_uses_fpu in current_procinfo.flags) and
  1712. ((len=8) or (len=16) or (len=24) or (len=32)) then
  1713. cm:=copy_mmx;
  1714. if (len>helpsize) then
  1715. cm:=copy_string;
  1716. if (cs_opt_size in current_settings.optimizerswitches) and
  1717. not((len<=16) and (cm=copy_mmx)) and
  1718. not(len in copy_len_sizes) then
  1719. cm:=copy_string;
  1720. if (source.segment<>NR_NO) or
  1721. (dest.segment<>NR_NO) then
  1722. cm:=copy_string;
  1723. case cm of
  1724. copy_move:
  1725. begin
  1726. dstref:=dest;
  1727. srcref:=source;
  1728. copysize:=sizeof(aint);
  1729. cgsize:=int_cgsize(copysize);
  1730. while len<>0 do
  1731. begin
  1732. if len<2 then
  1733. begin
  1734. copysize:=1;
  1735. cgsize:=OS_8;
  1736. end
  1737. else if len<4 then
  1738. begin
  1739. copysize:=2;
  1740. cgsize:=OS_16;
  1741. end
  1742. {$if defined(cpu32bitalu) or defined(cpu64bitalu)}
  1743. else if len<8 then
  1744. begin
  1745. copysize:=4;
  1746. cgsize:=OS_32;
  1747. end
  1748. {$endif cpu32bitalu or cpu64bitalu}
  1749. {$ifdef cpu64bitalu}
  1750. else if len<16 then
  1751. begin
  1752. copysize:=8;
  1753. cgsize:=OS_64;
  1754. end
  1755. {$endif}
  1756. ;
  1757. dec(len,copysize);
  1758. r:=getintregister(list,cgsize);
  1759. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  1760. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  1761. inc(srcref.offset,copysize);
  1762. inc(dstref.offset,copysize);
  1763. end;
  1764. end;
  1765. copy_mmx:
  1766. begin
  1767. dstref:=dest;
  1768. srcref:=source;
  1769. r0:=getmmxregister(list);
  1770. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r0,nil);
  1771. if len>=16 then
  1772. begin
  1773. inc(srcref.offset,8);
  1774. r1:=getmmxregister(list);
  1775. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r1,nil);
  1776. end;
  1777. if len>=24 then
  1778. begin
  1779. inc(srcref.offset,8);
  1780. r2:=getmmxregister(list);
  1781. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r2,nil);
  1782. end;
  1783. if len>=32 then
  1784. begin
  1785. inc(srcref.offset,8);
  1786. r3:=getmmxregister(list);
  1787. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r3,nil);
  1788. end;
  1789. a_loadmm_reg_ref(list,OS_M64,OS_M64,r0,dstref,nil);
  1790. if len>=16 then
  1791. begin
  1792. inc(dstref.offset,8);
  1793. a_loadmm_reg_ref(list,OS_M64,OS_M64,r1,dstref,nil);
  1794. end;
  1795. if len>=24 then
  1796. begin
  1797. inc(dstref.offset,8);
  1798. a_loadmm_reg_ref(list,OS_M64,OS_M64,r2,dstref,nil);
  1799. end;
  1800. if len>=32 then
  1801. begin
  1802. inc(dstref.offset,8);
  1803. a_loadmm_reg_ref(list,OS_M64,OS_M64,r3,dstref,nil);
  1804. end;
  1805. end
  1806. else {copy_string, should be a good fallback in case of unhandled}
  1807. begin
  1808. getcpuregister(list,REGDI);
  1809. if (dest.segment=NR_NO) then
  1810. begin
  1811. a_loadaddr_ref_reg(list,dest,REGDI);
  1812. {$ifdef volatile_es}
  1813. list.concat(taicpu.op_reg(A_PUSH,push_segment_size,NR_DS));
  1814. list.concat(taicpu.op_reg(A_POP,push_segment_size,NR_ES));
  1815. {$endif volatile_es}
  1816. end
  1817. else
  1818. begin
  1819. dstref:=dest;
  1820. dstref.segment:=NR_NO;
  1821. a_loadaddr_ref_reg(list,dstref,REGDI);
  1822. {$ifndef volatile_es}
  1823. list.concat(taicpu.op_reg(A_PUSH,push_segment_size,NR_ES));
  1824. {$endif not volatile_es}
  1825. list.concat(taicpu.op_reg(A_PUSH,push_segment_size,dest.segment));
  1826. list.concat(taicpu.op_reg(A_POP,push_segment_size,NR_ES));
  1827. end;
  1828. getcpuregister(list,REGSI);
  1829. if (source.segment=NR_NO) then
  1830. a_loadaddr_ref_reg(list,source,REGSI)
  1831. else
  1832. begin
  1833. srcref:=source;
  1834. srcref.segment:=NR_NO;
  1835. a_loadaddr_ref_reg(list,srcref,REGSI);
  1836. list.concat(taicpu.op_reg(A_PUSH,S_L,NR_DS));
  1837. list.concat(taicpu.op_reg(A_PUSH,S_L,source.segment));
  1838. list.concat(taicpu.op_reg(A_POP,S_L,NR_DS));
  1839. end;
  1840. getcpuregister(list,REGCX);
  1841. {$if defined(i8086) or defined(i386)}
  1842. list.concat(Taicpu.op_none(A_CLD,S_NO));
  1843. {$endif i8086 or i386}
  1844. if (cs_opt_size in current_settings.optimizerswitches) and
  1845. (len>sizeof(aint)+(sizeof(aint) div 2)) then
  1846. begin
  1847. a_load_const_reg(list,OS_INT,len,REGCX);
  1848. list.concat(Taicpu.op_none(A_REP,S_NO));
  1849. list.concat(Taicpu.op_none(A_MOVSB,S_NO));
  1850. end
  1851. else
  1852. begin
  1853. helpsize:=len div sizeof(aint);
  1854. len:=len mod sizeof(aint);
  1855. if helpsize>1 then
  1856. begin
  1857. a_load_const_reg(list,OS_INT,helpsize,REGCX);
  1858. list.concat(Taicpu.op_none(A_REP,S_NO));
  1859. end;
  1860. if helpsize>0 then
  1861. begin
  1862. {$if defined(cpu64bitalu)}
  1863. list.concat(Taicpu.op_none(A_MOVSQ,S_NO))
  1864. {$elseif defined(cpu32bitalu)}
  1865. list.concat(Taicpu.op_none(A_MOVSD,S_NO));
  1866. {$elseif defined(cpu16bitalu)}
  1867. list.concat(Taicpu.op_none(A_MOVSW,S_NO));
  1868. {$endif}
  1869. end;
  1870. if len>=4 then
  1871. begin
  1872. dec(len,4);
  1873. list.concat(Taicpu.op_none(A_MOVSD,S_NO));
  1874. end;
  1875. if len>=2 then
  1876. begin
  1877. dec(len,2);
  1878. list.concat(Taicpu.op_none(A_MOVSW,S_NO));
  1879. end;
  1880. if len=1 then
  1881. list.concat(Taicpu.op_none(A_MOVSB,S_NO));
  1882. end;
  1883. ungetcpuregister(list,REGCX);
  1884. ungetcpuregister(list,REGSI);
  1885. ungetcpuregister(list,REGDI);
  1886. if (source.segment<>NR_NO) then
  1887. list.concat(taicpu.op_reg(A_POP,push_segment_size,NR_DS));
  1888. {$ifndef volatile_es}
  1889. if (dest.segment<>NR_NO) then
  1890. list.concat(taicpu.op_reg(A_POP,push_segment_size,NR_ES));
  1891. {$endif not volatile_es}
  1892. end;
  1893. end;
  1894. end;
  1895. {****************************************************************************
  1896. Entry/Exit Code Helpers
  1897. ****************************************************************************}
  1898. procedure tcgx86.g_profilecode(list : TAsmList);
  1899. var
  1900. pl : tasmlabel;
  1901. mcountprefix : String[4];
  1902. begin
  1903. case target_info.system of
  1904. {$ifndef NOTARGETWIN}
  1905. system_i386_win32,
  1906. {$endif}
  1907. system_i386_freebsd,
  1908. system_i386_netbsd,
  1909. // system_i386_openbsd,
  1910. system_i386_wdosx :
  1911. begin
  1912. Case target_info.system Of
  1913. system_i386_freebsd : mcountprefix:='.';
  1914. system_i386_netbsd : mcountprefix:='__';
  1915. // system_i386_openbsd : mcountprefix:='.';
  1916. else
  1917. mcountPrefix:='';
  1918. end;
  1919. current_asmdata.getaddrlabel(pl);
  1920. new_section(list,sec_data,lower(current_procinfo.procdef.mangledname),sizeof(pint));
  1921. list.concat(Tai_label.Create(pl));
  1922. list.concat(Tai_const.Create_32bit(0));
  1923. new_section(list,sec_code,lower(current_procinfo.procdef.mangledname),0);
  1924. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EDX));
  1925. list.concat(Taicpu.Op_sym_ofs_reg(A_MOV,S_L,pl,0,NR_EDX));
  1926. a_call_name(list,target_info.Cprefix+mcountprefix+'mcount',false);
  1927. list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDX));
  1928. end;
  1929. system_i386_linux:
  1930. a_call_name(list,target_info.Cprefix+'mcount',false);
  1931. system_i386_go32v2,system_i386_watcom:
  1932. begin
  1933. a_call_name(list,'MCOUNT',false);
  1934. end;
  1935. system_x86_64_linux,
  1936. system_x86_64_darwin:
  1937. begin
  1938. a_call_name(list,'mcount',false);
  1939. end;
  1940. end;
  1941. end;
  1942. procedure tcgx86.g_stackpointer_alloc(list : TAsmList;localsize : longint);
  1943. {$ifdef x86}
  1944. {$ifndef NOTARGETWIN}
  1945. var
  1946. href : treference;
  1947. i : integer;
  1948. again : tasmlabel;
  1949. {$endif NOTARGETWIN}
  1950. {$endif x86}
  1951. begin
  1952. if localsize>0 then
  1953. begin
  1954. {$ifdef i386}
  1955. {$ifndef NOTARGETWIN}
  1956. { windows guards only a few pages for stack growing,
  1957. so we have to access every page first }
  1958. if (target_info.system in [system_i386_win32,system_i386_wince]) and
  1959. (localsize>=winstackpagesize) then
  1960. begin
  1961. if localsize div winstackpagesize<=5 then
  1962. begin
  1963. list.concat(Taicpu.Op_const_reg(A_SUB,S_L,localsize-4,NR_ESP));
  1964. for i:=1 to localsize div winstackpagesize do
  1965. begin
  1966. reference_reset_base(href,NR_ESP,localsize-i*winstackpagesize,4);
  1967. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  1968. end;
  1969. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
  1970. end
  1971. else
  1972. begin
  1973. current_asmdata.getjumplabel(again);
  1974. getcpuregister(list,NR_EDI);
  1975. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI));
  1976. list.concat(Taicpu.op_const_reg(A_MOV,S_L,localsize div winstackpagesize,NR_EDI));
  1977. a_label(list,again);
  1978. list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize-4,NR_ESP));
  1979. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
  1980. list.concat(Taicpu.op_reg(A_DEC,S_L,NR_EDI));
  1981. a_jmp_cond(list,OC_NE,again);
  1982. list.concat(Taicpu.op_const_reg(A_SUB,S_L,localsize mod winstackpagesize - 4,NR_ESP));
  1983. reference_reset_base(href,NR_ESP,localsize-4,4);
  1984. list.concat(Taicpu.op_ref_reg(A_MOV,S_L,href,NR_EDI));
  1985. ungetcpuregister(list,NR_EDI);
  1986. end
  1987. end
  1988. else
  1989. {$endif NOTARGETWIN}
  1990. {$endif i386}
  1991. {$ifdef x86_64}
  1992. {$ifndef NOTARGETWIN}
  1993. { windows guards only a few pages for stack growing,
  1994. so we have to access every page first }
  1995. if (target_info.system=system_x86_64_win64) and
  1996. (localsize>=winstackpagesize) then
  1997. begin
  1998. if localsize div winstackpagesize<=5 then
  1999. begin
  2000. list.concat(Taicpu.Op_const_reg(A_SUB,S_Q,localsize,NR_RSP));
  2001. for i:=1 to localsize div winstackpagesize do
  2002. begin
  2003. reference_reset_base(href,NR_RSP,localsize-i*winstackpagesize+4,4);
  2004. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  2005. end;
  2006. reference_reset_base(href,NR_RSP,0,4);
  2007. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  2008. end
  2009. else
  2010. begin
  2011. current_asmdata.getjumplabel(again);
  2012. getcpuregister(list,NR_R10);
  2013. list.concat(Taicpu.op_const_reg(A_MOV,S_Q,localsize div winstackpagesize,NR_R10));
  2014. a_label(list,again);
  2015. list.concat(Taicpu.op_const_reg(A_SUB,S_Q,winstackpagesize,NR_RSP));
  2016. reference_reset_base(href,NR_RSP,0,4);
  2017. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  2018. list.concat(Taicpu.op_reg(A_DEC,S_Q,NR_R10));
  2019. a_jmp_cond(list,OC_NE,again);
  2020. list.concat(Taicpu.op_const_reg(A_SUB,S_Q,localsize mod winstackpagesize,NR_RSP));
  2021. ungetcpuregister(list,NR_R10);
  2022. end
  2023. end
  2024. else
  2025. {$endif NOTARGETWIN}
  2026. {$endif x86_64}
  2027. list.concat(Taicpu.Op_const_reg(A_SUB,tcgsize2opsize[OS_ADDR],localsize,NR_STACK_POINTER_REG));
  2028. end;
  2029. end;
  2030. procedure tcgx86.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  2031. var
  2032. stackmisalignment: longint;
  2033. para: tparavarsym;
  2034. {$ifdef i8086}
  2035. dgroup: treference;
  2036. {$endif i8086}
  2037. begin
  2038. {$ifdef i8086}
  2039. { interrupt support for i8086 }
  2040. if po_interrupt in current_procinfo.procdef.procoptions then
  2041. begin
  2042. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_AX));
  2043. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_BX));
  2044. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_CX));
  2045. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_DX));
  2046. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_SI));
  2047. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_DI));
  2048. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_DS));
  2049. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_ES));
  2050. reference_reset(dgroup,0);
  2051. dgroup.refaddr:=addr_dgroup;
  2052. list.concat(Taicpu.Op_ref_reg(A_MOV,S_W,dgroup,NR_AX));
  2053. list.concat(Taicpu.Op_reg_reg(A_MOV,S_W,NR_AX,NR_DS));
  2054. end;
  2055. {$endif i8086}
  2056. {$ifdef i386}
  2057. { interrupt support for i386 }
  2058. if (po_interrupt in current_procinfo.procdef.procoptions) and
  2059. { this messes up stack alignment }
  2060. not(target_info.system in [system_i386_darwin,system_i386_iphonesim,system_i386_android]) then
  2061. begin
  2062. { .... also the segment registers }
  2063. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_GS));
  2064. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_FS));
  2065. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_ES));
  2066. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_DS));
  2067. { save the registers of an interrupt procedure }
  2068. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EDI));
  2069. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_ESI));
  2070. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EDX));
  2071. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_ECX));
  2072. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EBX));
  2073. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EAX));
  2074. end;
  2075. {$endif i386}
  2076. { save old framepointer }
  2077. if not nostackframe then
  2078. begin
  2079. { return address }
  2080. stackmisalignment := sizeof(pint);
  2081. list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
  2082. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  2083. CGmessage(cg_d_stackframe_omited)
  2084. else
  2085. begin
  2086. { push <frame_pointer> }
  2087. inc(stackmisalignment,sizeof(pint));
  2088. include(rg[R_INTREGISTER].preserved_by_proc,RS_FRAME_POINTER_REG);
  2089. list.concat(Taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  2090. if (target_info.system=system_x86_64_win64) then
  2091. begin
  2092. list.concat(cai_seh_directive.create_reg(ash_pushreg,NR_FRAME_POINTER_REG));
  2093. include(current_procinfo.flags,pi_has_unwind_info);
  2094. end;
  2095. { Return address and FP are both on stack }
  2096. current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
  2097. current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
  2098. if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
  2099. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
  2100. else
  2101. begin
  2102. { load framepointer from hidden $parentfp parameter }
  2103. para:=tparavarsym(current_procinfo.procdef.paras[0]);
  2104. if not (vo_is_parentfp in para.varoptions) then
  2105. InternalError(201201142);
  2106. if (para.paraloc[calleeside].location^.loc<>LOC_REGISTER) or
  2107. (para.paraloc[calleeside].location^.next<>nil) then
  2108. InternalError(201201143);
  2109. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],
  2110. para.paraloc[calleeside].location^.register,NR_FRAME_POINTER_REG));
  2111. { Need only as much stack space as necessary to do the calls.
  2112. Exception filters don't have own local vars, and temps are 'mapped'
  2113. to the parent procedure.
  2114. maxpushedparasize is already aligned at least on x86_64. }
  2115. localsize:=current_procinfo.maxpushedparasize;
  2116. end;
  2117. current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FRAME_POINTER_REG);
  2118. {
  2119. TODO: current framepointer handling is not compatible with Win64 at all:
  2120. Win64 expects FP to point to the top or into the middle of local area.
  2121. In FPC it points to the bottom, making it impossible to generate
  2122. UWOP_SET_FPREG unwind code if local area is > 240 bytes.
  2123. So for now pretend we never have a framepointer.
  2124. }
  2125. end;
  2126. { allocate stackframe space }
  2127. if (localsize<>0) or
  2128. ((target_info.stackalign>sizeof(pint)) and
  2129. (stackmisalignment <> 0) and
  2130. ((pi_do_call in current_procinfo.flags) or
  2131. (po_assembler in current_procinfo.procdef.procoptions))) then
  2132. begin
  2133. if target_info.stackalign>sizeof(pint) then
  2134. localsize := align(localsize+stackmisalignment,target_info.stackalign)-stackmisalignment;
  2135. cg.g_stackpointer_alloc(list,localsize);
  2136. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  2137. current_asmdata.asmcfi.cfa_def_cfa_offset(list,localsize+sizeof(pint));
  2138. current_procinfo.final_localsize:=localsize;
  2139. if (target_info.system=system_x86_64_win64) then
  2140. begin
  2141. if localsize<>0 then
  2142. list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
  2143. include(current_procinfo.flags,pi_has_unwind_info);
  2144. end;
  2145. end;
  2146. end;
  2147. end;
  2148. { produces if necessary overflowcode }
  2149. procedure tcgx86.g_overflowcheck(list: TAsmList; const l:tlocation;def:tdef);
  2150. var
  2151. hl : tasmlabel;
  2152. ai : taicpu;
  2153. cond : TAsmCond;
  2154. begin
  2155. if not(cs_check_overflow in current_settings.localswitches) then
  2156. exit;
  2157. current_asmdata.getjumplabel(hl);
  2158. if not ((def.typ=pointerdef) or
  2159. ((def.typ=orddef) and
  2160. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2161. pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2162. cond:=C_NO
  2163. else
  2164. cond:=C_NB;
  2165. ai:=Taicpu.Op_Sym(A_Jcc,S_NO,hl);
  2166. ai.SetCondition(cond);
  2167. ai.is_jmp:=true;
  2168. list.concat(ai);
  2169. a_call_name(list,'FPC_OVERFLOW',false);
  2170. a_label(list,hl);
  2171. end;
  2172. procedure tcgx86.g_external_wrapper(list: TAsmList; procdef: tprocdef; const externalname: string);
  2173. var
  2174. ref : treference;
  2175. sym : tasmsymbol;
  2176. begin
  2177. if (target_info.system = system_i386_darwin) then
  2178. begin
  2179. { a_jmp_name jumps to a stub which is always pic-safe on darwin }
  2180. inherited g_external_wrapper(list,procdef,externalname);
  2181. exit;
  2182. end;
  2183. sym:=current_asmdata.RefAsmSymbol(externalname);
  2184. reference_reset_symbol(ref,sym,0,sizeof(pint));
  2185. { create pic'ed? }
  2186. if (cs_create_pic in current_settings.moduleswitches) and
  2187. { darwin/x86_64's assembler doesn't want @PLT after call symbols }
  2188. not(target_info.system in [system_x86_64_darwin,system_i386_iphonesim]) then
  2189. ref.refaddr:=addr_pic
  2190. else
  2191. ref.refaddr:=addr_full;
  2192. list.concat(taicpu.op_ref(A_JMP,S_NO,ref));
  2193. end;
  2194. end.