nx86inl.pas 61 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function first_minmax: tnode; override;
  48. function simplify(forinline : boolean) : tnode; override;
  49. { second pass override to generate these nodes }
  50. procedure pass_generate_code_cpu;override;
  51. procedure second_IncludeExclude;override;
  52. procedure second_pi; override;
  53. procedure second_arctan_real; override;
  54. procedure second_abs_real; override;
  55. procedure second_round_real; override;
  56. procedure second_sqr_real; override;
  57. procedure second_sqrt_real; override;
  58. procedure second_ln_real; override;
  59. procedure second_cos_real; override;
  60. procedure second_sin_real; override;
  61. procedure second_trunc_real; override;
  62. procedure second_prefetch;override;
  63. procedure second_abs_long;override;
  64. procedure second_popcnt;override;
  65. procedure second_fma;override;
  66. procedure second_frac_real;override;
  67. procedure second_int_real;override;
  68. procedure second_high;override;
  69. procedure second_minmax;override;
  70. private
  71. procedure load_fpu_location(lnode: tnode);
  72. end;
  73. implementation
  74. uses
  75. systems,
  76. globtype,globals,
  77. verbose,compinnr,fmodule,
  78. defutil,
  79. aasmbase,aasmdata,aasmcpu,
  80. symconst,symtype,symdef,symcpu,
  81. ncnv,
  82. htypechk,
  83. cgbase,pass_1,pass_2,
  84. cpuinfo,cpubase,nutils,
  85. ncal,ncgutil,nld,ncon,
  86. tgobj,
  87. cga,cgutils,cgx86,cgobj,hlcgobj;
  88. {*****************************************************************************
  89. TX86INLINENODE
  90. *****************************************************************************}
  91. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  92. begin
  93. { only makes a difference for x86_64 }
  94. end;
  95. function tx86inlinenode.pass_typecheck_cpu: tnode;
  96. begin
  97. Result:=nil;
  98. case inlinenumber of
  99. in_x86_inportb:
  100. begin
  101. CheckParameters(1);
  102. resultdef:=u8inttype;
  103. end;
  104. in_x86_inportw:
  105. begin
  106. CheckParameters(1);
  107. resultdef:=u16inttype;
  108. end;
  109. in_x86_inportl:
  110. begin
  111. CheckParameters(1);
  112. resultdef:=s32inttype;
  113. end;
  114. in_x86_outportb,
  115. in_x86_outportw,
  116. in_x86_outportl:
  117. begin
  118. CheckParameters(2);
  119. resultdef:=voidtype;
  120. end;
  121. in_x86_cli,
  122. in_x86_sti:
  123. resultdef:=voidtype;
  124. in_x86_get_cs,
  125. in_x86_get_ss,
  126. in_x86_get_ds,
  127. in_x86_get_es,
  128. in_x86_get_fs,
  129. in_x86_get_gs:
  130. {$ifdef i8086}
  131. resultdef:=u16inttype;
  132. {$else i8086}
  133. resultdef:=s32inttype;
  134. {$endif i8086}
  135. { include automatically generated code }
  136. {$i x86mmtype.inc}
  137. else
  138. Result:=inherited pass_typecheck_cpu;
  139. end;
  140. end;
  141. function tx86inlinenode.first_cpu: tnode;
  142. begin
  143. Result:=nil;
  144. case inlinenumber of
  145. in_x86_inportb,
  146. in_x86_inportw,
  147. in_x86_inportl,
  148. in_x86_get_cs,
  149. in_x86_get_ss,
  150. in_x86_get_ds,
  151. in_x86_get_es,
  152. in_x86_get_fs,
  153. in_x86_get_gs:
  154. expectloc:=LOC_REGISTER;
  155. in_x86_outportb,
  156. in_x86_outportw,
  157. in_x86_outportl,
  158. in_x86_cli,
  159. in_x86_sti:
  160. expectloc:=LOC_VOID;
  161. { include automatically generated code }
  162. {$i x86mmfirst.inc}
  163. else
  164. Result:=inherited first_cpu;
  165. end;
  166. end;
  167. function tx86inlinenode.first_pi : tnode;
  168. begin
  169. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  170. begin
  171. expectloc:=LOC_FPUREGISTER;
  172. first_pi := nil;
  173. end
  174. else
  175. result:=inherited;
  176. end;
  177. function tx86inlinenode.first_arctan_real : tnode;
  178. begin
  179. {$ifdef i8086}
  180. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  181. so we need to use the RTL helper on these FPUs }
  182. if current_settings.cputype < cpu_386 then
  183. begin
  184. result := inherited;
  185. exit;
  186. end;
  187. {$endif i8086}
  188. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  189. begin
  190. expectloc:=LOC_FPUREGISTER;
  191. first_arctan_real := nil;
  192. end
  193. else
  194. result:=inherited;
  195. end;
  196. function tx86inlinenode.first_abs_real : tnode;
  197. begin
  198. if use_vectorfpu(resultdef) then
  199. expectloc:=LOC_MMREGISTER
  200. else
  201. expectloc:=LOC_FPUREGISTER;
  202. first_abs_real := nil;
  203. end;
  204. function tx86inlinenode.first_sqr_real : tnode;
  205. begin
  206. if use_vectorfpu(resultdef) then
  207. expectloc:=LOC_MMREGISTER
  208. else
  209. expectloc:=LOC_FPUREGISTER;
  210. first_sqr_real := nil;
  211. end;
  212. function tx86inlinenode.first_sqrt_real : tnode;
  213. begin
  214. if use_vectorfpu(resultdef) then
  215. expectloc:=LOC_MMREGISTER
  216. else
  217. expectloc:=LOC_FPUREGISTER;
  218. first_sqrt_real := nil;
  219. end;
  220. function tx86inlinenode.first_ln_real : tnode;
  221. begin
  222. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  223. begin
  224. expectloc:=LOC_FPUREGISTER;
  225. first_ln_real := nil;
  226. end
  227. else
  228. result:=inherited;
  229. end;
  230. function tx86inlinenode.first_cos_real : tnode;
  231. begin
  232. {$ifdef i8086}
  233. { FCOS is 387+ }
  234. if current_settings.cputype < cpu_386 then
  235. begin
  236. result := inherited;
  237. exit;
  238. end;
  239. {$endif i8086}
  240. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  241. begin
  242. expectloc:=LOC_FPUREGISTER;
  243. result:=nil;
  244. end
  245. else
  246. result:=inherited;
  247. end;
  248. function tx86inlinenode.first_sin_real : tnode;
  249. begin
  250. {$ifdef i8086}
  251. { FSIN is 387+ }
  252. if current_settings.cputype < cpu_386 then
  253. begin
  254. result := inherited;
  255. exit;
  256. end;
  257. {$endif i8086}
  258. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  259. begin
  260. expectloc:=LOC_FPUREGISTER;
  261. result:=nil;
  262. end
  263. else
  264. result:=inherited;
  265. end;
  266. function tx86inlinenode.first_round_real : tnode;
  267. begin
  268. maybe_remove_round_trunc_typeconv;
  269. {$ifdef x86_64}
  270. if use_vectorfpu(left.resultdef) then
  271. expectloc:=LOC_REGISTER
  272. else
  273. {$endif x86_64}
  274. expectloc:=LOC_REFERENCE;
  275. result:=nil;
  276. end;
  277. function tx86inlinenode.first_trunc_real: tnode;
  278. begin
  279. maybe_remove_round_trunc_typeconv;
  280. if (cs_opt_size in current_settings.optimizerswitches)
  281. {$ifdef x86_64}
  282. and not(use_vectorfpu(left.resultdef))
  283. {$endif x86_64}
  284. then
  285. result:=inherited
  286. else
  287. begin
  288. {$ifdef x86_64}
  289. if use_vectorfpu(left.resultdef) then
  290. expectloc:=LOC_REGISTER
  291. else
  292. {$endif x86_64}
  293. expectloc:=LOC_REFERENCE;
  294. result:=nil;
  295. end;
  296. end;
  297. function tx86inlinenode.first_popcnt: tnode;
  298. begin
  299. Result:=nil;
  300. {$ifndef i8086}
  301. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  302. {$ifdef i386}
  303. and not is_64bit(left.resultdef)
  304. {$endif i386}
  305. then
  306. expectloc:=LOC_REGISTER
  307. else
  308. {$endif not i8086}
  309. Result:=inherited first_popcnt
  310. end;
  311. function tx86inlinenode.first_fma : tnode;
  312. begin
  313. {$ifndef i8086}
  314. if ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]) and
  315. ((is_double(resultdef)) or (is_single(resultdef))) then
  316. begin
  317. expectloc:=LOC_MMREGISTER;
  318. Result:=nil;
  319. end
  320. else
  321. {$endif i8086}
  322. Result:=inherited first_fma;
  323. end;
  324. function tx86inlinenode.first_frac_real : tnode;
  325. begin
  326. if (current_settings.fputype>=fpu_sse41) and
  327. ((is_double(resultdef)) or (is_single(resultdef))) then
  328. begin
  329. maybe_remove_round_trunc_typeconv;
  330. expectloc:=LOC_MMREGISTER;
  331. Result:=nil;
  332. end
  333. else
  334. Result:=inherited first_frac_real;
  335. end;
  336. function tx86inlinenode.first_int_real : tnode;
  337. begin
  338. if (current_settings.fputype>=fpu_sse41) and
  339. ((is_double(resultdef)) or (is_single(resultdef))) then
  340. begin
  341. Result:=nil;
  342. expectloc:=LOC_MMREGISTER;
  343. end
  344. else
  345. Result:=inherited first_int_real;
  346. end;
  347. function tx86inlinenode.first_minmax: tnode;
  348. begin
  349. {$ifndef i8086}
  350. if
  351. {$ifdef i386}
  352. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  353. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  354. {$else i386}
  355. ((is_double(resultdef)) or (is_single(resultdef)))
  356. {$endif i386}
  357. then
  358. begin
  359. expectloc:=LOC_MMREGISTER;
  360. Result:=nil;
  361. end
  362. else
  363. {$endif i8086}
  364. Result:=inherited first_minmax;
  365. end;
  366. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  367. var
  368. temp : tnode;
  369. begin
  370. if (current_settings.fputype>=fpu_sse41) and
  371. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  372. not(nf_explicit in left.flags) and
  373. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  374. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  375. begin
  376. { get rid of the type conversion }
  377. temp:=ttypeconvnode(left).left;
  378. ttypeconvnode(left).left:=nil;
  379. left.free;
  380. left:=temp;
  381. result:=self.getcopy;
  382. tinlinenode(result).resultdef:=temp.resultdef;
  383. typecheckpass(result);
  384. end
  385. else
  386. Result:=inherited simplify(forinline);
  387. end;
  388. procedure tx86inlinenode.pass_generate_code_cpu;
  389. var
  390. paraarray : array[1..4] of tnode;
  391. i : integer;
  392. op: TAsmOp;
  393. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  394. var
  395. portnumber: tnode;
  396. begin
  397. portnumber:=left;
  398. secondpass(portnumber);
  399. if (portnumber.location.loc=LOC_CONSTANT) and
  400. (portnumber.location.value>=0) and
  401. (portnumber.location.value<=255) then
  402. begin
  403. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  404. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  405. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  406. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  407. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  408. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  409. end
  410. else
  411. begin
  412. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  413. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  414. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  415. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  416. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  417. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  418. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  419. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  420. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  421. end;
  422. end;
  423. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  424. var
  425. portnumber, portdata: tnode;
  426. begin
  427. portnumber:=tcallparanode(tcallparanode(left).right).left;
  428. portdata:=tcallparanode(left).left;
  429. secondpass(portdata);
  430. secondpass(portnumber);
  431. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  432. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  433. if (portnumber.location.loc=LOC_CONSTANT) and
  434. (portnumber.location.value>=0) and
  435. (portnumber.location.value<=255) then
  436. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  437. else
  438. begin
  439. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  440. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  441. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  442. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  443. end;
  444. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  445. end;
  446. procedure get_segreg(segreg:tregister);
  447. begin
  448. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  449. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  450. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  451. end;
  452. function GetConstInt(n: tnode): longint;
  453. begin
  454. Result:=0;
  455. if is_constintnode(n) then
  456. result:=tordconstnode(n).value.svalue
  457. else
  458. Message(type_e_constant_expr_expected);
  459. end;
  460. procedure GetParameters(count: longint);
  461. var
  462. i: longint;
  463. p: tnode;
  464. begin
  465. if (count=1) and
  466. (not (left is tcallparanode)) then
  467. paraarray[1]:=left
  468. else
  469. begin
  470. p:=left;
  471. for i := count downto 1 do
  472. begin
  473. paraarray[i]:=tcallparanode(p).paravalue;
  474. p:=tcallparanode(p).nextpara;
  475. end;
  476. end;
  477. end;
  478. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  479. var
  480. reg : tregister;
  481. begin
  482. if (l.loc<>LOC_MMXREGISTER) and
  483. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  484. begin
  485. reg:=tcgx86(cg).getmmxregister(list);
  486. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  487. location_freetemp(list,l);
  488. location_reset(l,LOC_MMXREGISTER,OS_M64);
  489. l.register:=reg;
  490. end;
  491. end;
  492. procedure location_make_ref(var loc: tlocation);
  493. var
  494. hloc: tlocation;
  495. begin
  496. case loc.loc of
  497. LOC_CREGISTER,
  498. LOC_REGISTER:
  499. begin
  500. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  501. hloc.reference.base:=loc.register;
  502. loc:=hloc;
  503. end;
  504. LOC_CREFERENCE,
  505. LOC_REFERENCE:
  506. begin
  507. end;
  508. else
  509. begin
  510. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  511. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  512. hloc.reference.base:=loc.register;
  513. loc:=hloc;
  514. end;
  515. end;
  516. end;
  517. begin
  518. FillChar(paraarray,sizeof(paraarray),0);
  519. case inlinenumber of
  520. in_x86_inportb:
  521. inport(NR_AL,S_B,u8inttype);
  522. in_x86_inportw:
  523. inport(NR_AX,S_W,u16inttype);
  524. in_x86_inportl:
  525. inport(NR_EAX,S_L,s32inttype);
  526. in_x86_outportb:
  527. outport(NR_AL,S_B,u8inttype);
  528. in_x86_outportw:
  529. outport(NR_AX,S_W,u16inttype);
  530. in_x86_outportl:
  531. outport(NR_EAX,S_L,s32inttype);
  532. in_x86_cli:
  533. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  534. in_x86_sti:
  535. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  536. in_x86_get_cs:
  537. get_segreg(NR_CS);
  538. in_x86_get_ss:
  539. get_segreg(NR_SS);
  540. in_x86_get_ds:
  541. get_segreg(NR_DS);
  542. in_x86_get_es:
  543. get_segreg(NR_ES);
  544. in_x86_get_fs:
  545. get_segreg(NR_FS);
  546. in_x86_get_gs:
  547. get_segreg(NR_GS);
  548. {$i x86mmsecond.inc}
  549. else
  550. inherited pass_generate_code_cpu;
  551. end;
  552. end;
  553. procedure tx86inlinenode.second_pi;
  554. begin
  555. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  556. emit_none(A_FLDPI,S_NO);
  557. tcgx86(cg).inc_fpu_stack;
  558. location.register:=NR_FPU_RESULT_REG;
  559. end;
  560. { load the FPU into the an fpu register }
  561. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  562. begin
  563. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  564. location.register:=NR_FPU_RESULT_REG;
  565. secondpass(lnode);
  566. case lnode.location.loc of
  567. LOC_FPUREGISTER:
  568. ;
  569. LOC_CFPUREGISTER:
  570. begin
  571. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  572. lnode.location.size,lnode.location.register,location.register);
  573. end;
  574. LOC_REFERENCE,LOC_CREFERENCE:
  575. begin
  576. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  577. lnode.location.size,lnode.location.size,
  578. lnode.location.reference,location.register);
  579. end;
  580. LOC_MMREGISTER,LOC_CMMREGISTER:
  581. begin
  582. location:=lnode.location;
  583. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,lnode.resultdef,false);
  584. end;
  585. else
  586. internalerror(309991);
  587. end;
  588. end;
  589. procedure tx86inlinenode.second_arctan_real;
  590. begin
  591. load_fpu_location(left);
  592. emit_none(A_FLD1,S_NO);
  593. emit_none(A_FPATAN,S_NO);
  594. end;
  595. procedure tx86inlinenode.second_abs_real;
  596. function needs_indirect:boolean; inline;
  597. begin
  598. result:=(tf_supports_packages in target_info.flags) and
  599. (target_info.system in systems_indirect_var_imports);
  600. end;
  601. var
  602. href : treference;
  603. sym : tasmsymbol;
  604. begin
  605. if use_vectorfpu(resultdef) then
  606. begin
  607. secondpass(left);
  608. if left.location.loc<>LOC_MMREGISTER then
  609. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  610. if UseAVX then
  611. begin
  612. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  613. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  614. end
  615. else
  616. location:=left.location;
  617. case tfloatdef(resultdef).floattype of
  618. s32real:
  619. begin
  620. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  621. reference_reset_symbol(href,sym,0,4,[]);
  622. current_module.add_extern_asmsym(sym);
  623. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  624. if UseAVX then
  625. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  626. A_VANDPS,S_XMM,href,left.location.register,location.register))
  627. else
  628. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  629. end;
  630. s64real:
  631. begin
  632. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  633. reference_reset_symbol(href,sym,0,4,[]);
  634. current_module.add_extern_asmsym(sym);
  635. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  636. if UseAVX then
  637. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  638. A_VANDPD,S_XMM,href,left.location.register,location.register))
  639. else
  640. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  641. end;
  642. else
  643. internalerror(200506081);
  644. end;
  645. end
  646. else
  647. begin
  648. load_fpu_location(left);
  649. emit_none(A_FABS,S_NO);
  650. end;
  651. end;
  652. procedure tx86inlinenode.second_round_real;
  653. begin
  654. {$ifdef x86_64}
  655. if use_vectorfpu(left.resultdef) then
  656. begin
  657. secondpass(left);
  658. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  659. location_reset(location,LOC_REGISTER,OS_S64);
  660. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  661. if UseAVX then
  662. case left.location.size of
  663. OS_F32:
  664. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  665. OS_F64:
  666. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  667. else
  668. internalerror(2007031402);
  669. end
  670. else
  671. case left.location.size of
  672. OS_F32:
  673. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  674. OS_F64:
  675. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  676. else
  677. internalerror(2007031404);
  678. end;
  679. end
  680. else
  681. {$endif x86_64}
  682. begin
  683. load_fpu_location(left);
  684. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  685. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  686. emit_ref(A_FISTP,S_IQ,location.reference);
  687. tcgx86(cg).dec_fpu_stack;
  688. emit_none(A_FWAIT,S_NO);
  689. end;
  690. end;
  691. procedure tx86inlinenode.second_trunc_real;
  692. var
  693. oldcw,newcw : treference;
  694. begin
  695. {$ifdef x86_64}
  696. if use_vectorfpu(left.resultdef) and
  697. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  698. begin
  699. secondpass(left);
  700. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  701. location_reset(location,LOC_REGISTER,OS_S64);
  702. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  703. if UseAVX then
  704. case left.location.size of
  705. OS_F32:
  706. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  707. OS_F64:
  708. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  709. else
  710. internalerror(2007031401);
  711. end
  712. else
  713. case left.location.size of
  714. OS_F32:
  715. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  716. OS_F64:
  717. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  718. else
  719. internalerror(2007031403);
  720. end;
  721. end
  722. else
  723. {$endif x86_64}
  724. begin
  725. if (current_settings.fputype>=fpu_sse3) then
  726. begin
  727. load_fpu_location(left);
  728. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  729. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  730. emit_ref(A_FISTTP,S_IQ,location.reference);
  731. tcgx86(cg).dec_fpu_stack;
  732. end
  733. else
  734. begin
  735. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  736. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  737. {$ifdef i8086}
  738. if current_settings.cputype<=cpu_286 then
  739. begin
  740. emit_ref(A_FSTCW,S_NO,newcw);
  741. emit_ref(A_FSTCW,S_NO,oldcw);
  742. emit_none(A_FWAIT,S_NO);
  743. end
  744. else
  745. {$endif i8086}
  746. begin
  747. emit_ref(A_FNSTCW,S_NO,newcw);
  748. emit_ref(A_FNSTCW,S_NO,oldcw);
  749. end;
  750. emit_const_ref(A_OR,S_W,$0f00,newcw);
  751. load_fpu_location(left);
  752. emit_ref(A_FLDCW,S_NO,newcw);
  753. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  754. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  755. emit_ref(A_FISTP,S_IQ,location.reference);
  756. tcgx86(cg).dec_fpu_stack;
  757. emit_ref(A_FLDCW,S_NO,oldcw);
  758. emit_none(A_FWAIT,S_NO);
  759. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  760. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  761. end;
  762. end;
  763. end;
  764. procedure tx86inlinenode.second_sqr_real;
  765. begin
  766. if use_vectorfpu(resultdef) then
  767. begin
  768. secondpass(left);
  769. location_reset(location,LOC_MMREGISTER,left.location.size);
  770. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  771. if UseAVX then
  772. begin
  773. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  774. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  775. end
  776. else
  777. begin
  778. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  779. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  780. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  781. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  782. end;
  783. end
  784. else
  785. begin
  786. load_fpu_location(left);
  787. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  788. end;
  789. end;
  790. procedure tx86inlinenode.second_sqrt_real;
  791. begin
  792. if use_vectorfpu(resultdef) then
  793. begin
  794. secondpass(left);
  795. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  796. location_reset(location,LOC_MMREGISTER,left.location.size);
  797. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  798. if UseAVX then
  799. case tfloatdef(resultdef).floattype of
  800. s32real:
  801. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  802. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  803. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  804. s64real:
  805. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  806. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  807. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  808. else
  809. internalerror(200510031);
  810. end
  811. else
  812. case tfloatdef(resultdef).floattype of
  813. s32real:
  814. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  815. s64real:
  816. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  817. else
  818. internalerror(2005100303);
  819. end;
  820. end
  821. else
  822. begin
  823. load_fpu_location(left);
  824. if left.location.loc=LOC_REFERENCE then
  825. tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
  826. emit_none(A_FSQRT,S_NO);
  827. end;
  828. end;
  829. procedure tx86inlinenode.second_ln_real;
  830. begin
  831. load_fpu_location(left);
  832. emit_none(A_FLDLN2,S_NO);
  833. emit_none(A_FXCH,S_NO);
  834. emit_none(A_FYL2X,S_NO);
  835. end;
  836. procedure tx86inlinenode.second_cos_real;
  837. begin
  838. {$ifdef i8086}
  839. { FCOS is 387+ }
  840. if current_settings.cputype < cpu_386 then
  841. begin
  842. inherited;
  843. exit;
  844. end;
  845. {$endif i8086}
  846. load_fpu_location(left);
  847. emit_none(A_FCOS,S_NO);
  848. end;
  849. procedure tx86inlinenode.second_sin_real;
  850. begin
  851. {$ifdef i8086}
  852. { FSIN is 387+ }
  853. if current_settings.cputype < cpu_386 then
  854. begin
  855. inherited;
  856. exit;
  857. end;
  858. {$endif i8086}
  859. load_fpu_location(left);
  860. emit_none(A_FSIN,S_NO)
  861. end;
  862. procedure tx86inlinenode.second_prefetch;
  863. var
  864. ref : treference;
  865. r : tregister;
  866. checkpointer_used : boolean;
  867. begin
  868. {$if defined(i386) or defined(i8086)}
  869. if current_settings.cputype>=cpu_Pentium3 then
  870. {$endif i386 or i8086}
  871. begin
  872. { do not call Checkpointer for left node }
  873. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  874. if checkpointer_used then
  875. node_change_local_switch(left,cs_checkpointer,false);
  876. secondpass(left);
  877. if checkpointer_used then
  878. node_change_local_switch(left,cs_checkpointer,false);
  879. case left.location.loc of
  880. LOC_CREFERENCE,
  881. LOC_REFERENCE:
  882. begin
  883. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  884. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  885. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  886. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  887. end;
  888. else
  889. { nothing to prefetch };
  890. end;
  891. end;
  892. end;
  893. procedure tx86inlinenode.second_abs_long;
  894. var
  895. hregister : tregister;
  896. opsize : tcgsize;
  897. hp : taicpu;
  898. begin
  899. {$if defined(i8086) or defined(i386)}
  900. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  901. begin
  902. opsize:=def_cgsize(left.resultdef);
  903. secondpass(left);
  904. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  905. location:=left.location;
  906. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  907. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  908. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  909. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  910. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  911. end
  912. else
  913. {$endif i8086 or i386}
  914. begin
  915. opsize:=def_cgsize(left.resultdef);
  916. secondpass(left);
  917. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  918. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  919. location:=left.location;
  920. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  921. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  922. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  923. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  924. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  925. hp.condition:=C_NS;
  926. current_asmdata.CurrAsmList.concat(hp);
  927. end;
  928. end;
  929. {*****************************************************************************
  930. INCLUDE/EXCLUDE GENERIC HANDLING
  931. *****************************************************************************}
  932. procedure tx86inlinenode.second_IncludeExclude;
  933. var
  934. hregister,
  935. hregister2: tregister;
  936. setbase : aint;
  937. bitsperop,l : longint;
  938. cgop : topcg;
  939. asmop : tasmop;
  940. opdef : tdef;
  941. opsize,
  942. orgsize: tcgsize;
  943. begin
  944. {$ifdef i8086}
  945. { BTS and BTR are 386+ }
  946. if current_settings.cputype < cpu_386 then
  947. begin
  948. inherited;
  949. exit;
  950. end;
  951. {$endif i8086}
  952. if is_smallset(tcallparanode(left).resultdef) then
  953. begin
  954. opdef:=tcallparanode(left).resultdef;
  955. opsize:=int_cgsize(opdef.size)
  956. end
  957. else
  958. begin
  959. opdef:=u32inttype;
  960. opsize:=OS_32;
  961. end;
  962. bitsperop:=(8*tcgsize2size[opsize]);
  963. secondpass(tcallparanode(left).left);
  964. secondpass(tcallparanode(tcallparanode(left).right).left);
  965. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  966. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  967. begin
  968. { calculate bit position }
  969. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  970. { determine operator }
  971. if inlinenumber=in_include_x_y then
  972. cgop:=OP_OR
  973. else
  974. begin
  975. cgop:=OP_AND;
  976. l:=not(l);
  977. end;
  978. case tcallparanode(left).left.location.loc of
  979. LOC_REFERENCE :
  980. begin
  981. inc(tcallparanode(left).left.location.reference.offset,
  982. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  983. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  984. end;
  985. LOC_CSUBSETREG,
  986. LOC_CREGISTER :
  987. hlcg.a_op_const_loc(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.resultdef,l,tcallparanode(left).left.location);
  988. else
  989. internalerror(200405022);
  990. end;
  991. end
  992. else
  993. begin
  994. orgsize:=opsize;
  995. if opsize in [OS_8,OS_S8] then
  996. begin
  997. opdef:=u32inttype;
  998. opsize:=OS_32;
  999. end;
  1000. { determine asm operator }
  1001. if inlinenumber=in_include_x_y then
  1002. asmop:=A_BTS
  1003. else
  1004. asmop:=A_BTR;
  1005. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  1006. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  1007. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  1008. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  1009. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  1010. else
  1011. begin
  1012. { second argument can't be an 8 bit register either }
  1013. hregister2:=tcallparanode(left).left.location.register;
  1014. if (orgsize in [OS_8,OS_S8]) then
  1015. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  1016. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  1017. end;
  1018. end;
  1019. end;
  1020. procedure tx86inlinenode.second_popcnt;
  1021. var
  1022. opsize: tcgsize;
  1023. begin
  1024. secondpass(left);
  1025. opsize:=tcgsize2unsigned[left.location.size];
  1026. { no 8 Bit popcont }
  1027. if opsize=OS_8 then
  1028. opsize:=OS_16;
  1029. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1030. (left.location.size<>opsize) then
  1031. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1032. location_reset(location,LOC_REGISTER,opsize);
  1033. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1034. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1035. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1036. else
  1037. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1038. if resultdef.size=1 then
  1039. begin
  1040. location.size:=OS_8;
  1041. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1042. end;
  1043. end;
  1044. procedure tx86inlinenode.second_fma;
  1045. {$ifndef i8086}
  1046. const
  1047. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1048. (
  1049. { positive product }
  1050. (
  1051. { positive third operand }
  1052. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1053. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1054. ),
  1055. { negative third operand }
  1056. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1057. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1058. )
  1059. ),
  1060. { negative product }
  1061. (
  1062. { positive third operand }
  1063. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1064. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1065. ),
  1066. { negative third operand }
  1067. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1068. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1069. )
  1070. )
  1071. );
  1072. var
  1073. paraarray : array[1..3] of tnode;
  1074. memop,
  1075. i : integer;
  1076. negop3,
  1077. negproduct,
  1078. gotmem : boolean;
  1079. {$endif i8086}
  1080. begin
  1081. {$ifndef i8086}
  1082. if (fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[] then
  1083. begin
  1084. negop3:=false;
  1085. negproduct:=false;
  1086. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1087. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1088. paraarray[3]:=tcallparanode(parameters).paravalue;
  1089. { check if a neg. node can be removed
  1090. this is possible because changing the sign of
  1091. a floating point number does not affect its absolute
  1092. value in any way
  1093. }
  1094. if paraarray[1].nodetype=unaryminusn then
  1095. begin
  1096. paraarray[1]:=tunarynode(paraarray[1]).left;
  1097. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1098. only no code is generated for it }
  1099. negproduct:=not(negproduct);
  1100. end;
  1101. if paraarray[2].nodetype=unaryminusn then
  1102. begin
  1103. paraarray[2]:=tunarynode(paraarray[2]).left;
  1104. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1105. only no code is generated for it }
  1106. negproduct:=not(negproduct);
  1107. end;
  1108. if paraarray[3].nodetype=unaryminusn then
  1109. begin
  1110. paraarray[3]:=tunarynode(paraarray[3]).left;
  1111. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1112. only no code is generated for it }
  1113. negop3:=true;
  1114. end;
  1115. for i:=1 to 3 do
  1116. secondpass(paraarray[i]);
  1117. { only one memory operand is allowed }
  1118. gotmem:=false;
  1119. memop:=0;
  1120. { in case parameters come on the FPU stack, we have to pop them in reverse order as we
  1121. called secondpass }
  1122. for i:=3 downto 1 do
  1123. begin
  1124. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1125. begin
  1126. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1127. begin
  1128. memop:=i;
  1129. gotmem:=true;
  1130. end
  1131. else
  1132. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1133. end;
  1134. end;
  1135. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1136. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1137. if gotmem then
  1138. begin
  1139. case memop of
  1140. 1:
  1141. begin
  1142. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1143. paraarray[3].location.register,location.register,mms_movescalar);
  1144. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1145. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1146. end;
  1147. 2:
  1148. begin
  1149. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1150. paraarray[3].location.register,location.register,mms_movescalar);
  1151. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1152. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1153. end;
  1154. 3:
  1155. begin
  1156. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1157. paraarray[1].location.register,location.register,mms_movescalar);
  1158. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1159. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1160. end
  1161. else
  1162. internalerror(2014041301);
  1163. end;
  1164. end
  1165. else
  1166. begin
  1167. { try to use the location which is already in a temp. mm register as destination,
  1168. so the compiler might be able to re-use the register }
  1169. if paraarray[1].location.loc=LOC_MMREGISTER then
  1170. begin
  1171. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1172. paraarray[1].location.register,location.register,mms_movescalar);
  1173. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1174. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1175. end
  1176. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1177. begin
  1178. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1179. paraarray[2].location.register,location.register,mms_movescalar);
  1180. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1181. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1182. end
  1183. else
  1184. begin
  1185. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1186. paraarray[3].location.register,location.register,mms_movescalar);
  1187. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1188. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1189. end;
  1190. end;
  1191. end
  1192. else
  1193. {$endif i8086}
  1194. internalerror(2014032301);
  1195. end;
  1196. procedure tx86inlinenode.second_frac_real;
  1197. var
  1198. extrareg : TRegister;
  1199. begin
  1200. if use_vectorfpu(resultdef) then
  1201. begin
  1202. secondpass(left);
  1203. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1204. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1205. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1206. if UseAVX then
  1207. case tfloatdef(left.resultdef).floattype of
  1208. s32real:
  1209. begin
  1210. {$ifndef i8086}
  1211. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1212. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register))
  1213. else
  1214. {$endif not i8086}
  1215. begin
  1216. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1217. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1218. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1219. end;
  1220. end;
  1221. s64real:
  1222. begin
  1223. {$ifndef i8086}
  1224. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1225. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register))
  1226. else
  1227. {$endif not i8086}
  1228. begin
  1229. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1230. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1231. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1232. end;
  1233. end;
  1234. else
  1235. internalerror(2017052102);
  1236. end
  1237. else
  1238. begin
  1239. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1240. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1241. case tfloatdef(left.resultdef).floattype of
  1242. s32real:
  1243. begin
  1244. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1245. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1246. end;
  1247. s64real:
  1248. begin
  1249. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1250. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1251. end;
  1252. else
  1253. internalerror(2017052103);
  1254. end;
  1255. end;
  1256. if tfloatdef(left.resultdef).floattype<>tfloatdef(resultdef).floattype then
  1257. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,location.register,location.register,mms_movescalar);
  1258. end
  1259. else
  1260. internalerror(2017052101);
  1261. end;
  1262. procedure tx86inlinenode.second_int_real;
  1263. begin
  1264. if use_vectorfpu(resultdef) then
  1265. begin
  1266. secondpass(left);
  1267. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1268. location_reset(location,LOC_MMREGISTER,left.location.size);
  1269. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1270. if UseAVX then
  1271. case tfloatdef(resultdef).floattype of
  1272. s32real:
  1273. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1274. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1275. s64real:
  1276. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1277. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1278. else
  1279. internalerror(2017052105);
  1280. end
  1281. else
  1282. begin
  1283. case tfloatdef(resultdef).floattype of
  1284. s32real:
  1285. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1286. s64real:
  1287. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1288. else
  1289. internalerror(2017052106);
  1290. end;
  1291. end;
  1292. end
  1293. else
  1294. internalerror(2017052107);
  1295. end;
  1296. procedure tx86inlinenode.second_high;
  1297. var
  1298. donelab: tasmlabel;
  1299. hregister : tregister;
  1300. href : treference;
  1301. begin
  1302. secondpass(left);
  1303. if not(is_dynamic_array(left.resultdef)) then
  1304. Internalerror(2019122809);
  1305. { length in dynamic arrays is at offset -sizeof(pint) }
  1306. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1307. current_asmdata.getjumplabel(donelab);
  1308. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1309. flag is set in this case, so we can jump depending on it
  1310. when loading the actual high value, we have to take care later of the decreased value
  1311. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1312. we are using is not working as dec does not touch the carry flag }
  1313. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1314. { volatility of the dyn. array refers to the volatility of the
  1315. string pointer, not of the string data }
  1316. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1317. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1318. { if the string pointer is nil, the length is 0 -> reuse the register
  1319. that originally held the string pointer for the length, so that we
  1320. can keep the original nil/0 as length in that case }
  1321. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1322. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1323. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1324. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1325. location.register:=hregister;
  1326. end;
  1327. procedure tx86inlinenode.second_minmax;
  1328. {$ifndef i8086}
  1329. const
  1330. oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
  1331. (
  1332. (
  1333. (A_MINSS,A_MINSD),
  1334. (A_VMINSS,A_VMINSD)
  1335. ),
  1336. (
  1337. (A_MAXSS,A_MAXSD),
  1338. (A_VMAXSS,A_VMAXSD)
  1339. )
  1340. );
  1341. var
  1342. paraarray : array[1..2] of tnode;
  1343. memop,
  1344. i : integer;
  1345. gotmem : boolean;
  1346. op: TAsmOp;
  1347. {$endif i8086}
  1348. begin
  1349. {$ifndef i8086}
  1350. if
  1351. {$ifdef i386}
  1352. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  1353. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  1354. {$else i386}
  1355. is_single(resultdef) or is_double(resultdef)
  1356. {$endif i386}
  1357. then
  1358. begin
  1359. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1360. paraarray[2]:=tcallparanode(parameters).paravalue;
  1361. for i:=low(paraarray) to high(paraarray) do
  1362. secondpass(paraarray[i]);
  1363. { only one memory operand is allowed }
  1364. gotmem:=false;
  1365. memop:=0;
  1366. for i:=low(paraarray) to high(paraarray) do
  1367. begin
  1368. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1369. begin
  1370. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1371. begin
  1372. memop:=i;
  1373. gotmem:=true;
  1374. end
  1375. else
  1376. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1377. end;
  1378. end;
  1379. { due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
  1380. one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
  1381. if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
  1382. begin
  1383. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
  1384. gotmem:=false;
  1385. end;
  1386. op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
  1387. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1388. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1389. if gotmem then
  1390. begin
  1391. if UseAVX then
  1392. case memop of
  1393. 1:
  1394. emit_ref_reg_reg(op,S_NO,
  1395. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1396. 2:
  1397. emit_ref_reg_reg(op,S_NO,
  1398. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1399. else
  1400. internalerror(2020120504);
  1401. end
  1402. else
  1403. case memop of
  1404. 1:
  1405. begin
  1406. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1407. paraarray[2].location.register,location.register,mms_movescalar);
  1408. emit_ref_reg(op,S_NO,
  1409. paraarray[1].location.reference,location.register);
  1410. end;
  1411. 2:
  1412. begin
  1413. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1414. paraarray[1].location.register,location.register,mms_movescalar);
  1415. emit_ref_reg(op,S_NO,
  1416. paraarray[2].location.reference,location.register);
  1417. end;
  1418. else
  1419. internalerror(2020120601);
  1420. end;
  1421. end
  1422. else
  1423. begin
  1424. if UseAVX then
  1425. emit_reg_reg_reg(op,S_NO,
  1426. paraarray[2].location.register,paraarray[1].location.register,location.register)
  1427. else
  1428. begin
  1429. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1430. paraarray[1].location.register,location.register,mms_movescalar);
  1431. emit_reg_reg(op,S_NO,
  1432. paraarray[2].location.register,location.register)
  1433. end;
  1434. end;
  1435. end
  1436. else
  1437. {$endif i8086}
  1438. internalerror(2020120503);
  1439. end;
  1440. end.