nx86inl.pas 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function simplify(forinline : boolean) : tnode; override;
  48. { second pass override to generate these nodes }
  49. procedure pass_generate_code_cpu;override;
  50. procedure second_IncludeExclude;override;
  51. procedure second_pi; override;
  52. procedure second_arctan_real; override;
  53. procedure second_abs_real; override;
  54. procedure second_round_real; override;
  55. procedure second_sqr_real; override;
  56. procedure second_sqrt_real; override;
  57. procedure second_ln_real; override;
  58. procedure second_cos_real; override;
  59. procedure second_sin_real; override;
  60. procedure second_trunc_real; override;
  61. procedure second_prefetch;override;
  62. procedure second_abs_long;override;
  63. procedure second_popcnt;override;
  64. procedure second_fma;override;
  65. procedure second_frac_real;override;
  66. procedure second_int_real;override;
  67. private
  68. procedure load_fpu_location(lnode: tnode);
  69. end;
  70. implementation
  71. uses
  72. systems,
  73. globtype,globals,
  74. verbose,compinnr,
  75. defutil,
  76. aasmbase,aasmdata,aasmcpu,
  77. symconst,symtype,symdef,symcpu,
  78. ncnv,
  79. htypechk,
  80. cgbase,pass_1,pass_2,
  81. cpuinfo,cpubase,nutils,
  82. ncal,ncgutil,nld,
  83. tgobj,
  84. cga,cgutils,cgx86,cgobj,hlcgobj;
  85. {*****************************************************************************
  86. TX86INLINENODE
  87. *****************************************************************************}
  88. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  89. begin
  90. { only makes a difference for x86_64 }
  91. end;
  92. function tx86inlinenode.pass_typecheck_cpu: tnode;
  93. begin
  94. Result:=nil;
  95. case inlinenumber of
  96. in_x86_inportb:
  97. begin
  98. CheckParameters(1);
  99. resultdef:=u8inttype;
  100. end;
  101. in_x86_inportw:
  102. begin
  103. CheckParameters(1);
  104. resultdef:=u16inttype;
  105. end;
  106. in_x86_inportl:
  107. begin
  108. CheckParameters(1);
  109. resultdef:=s32inttype;
  110. end;
  111. in_x86_outportb,
  112. in_x86_outportw,
  113. in_x86_outportl:
  114. begin
  115. CheckParameters(2);
  116. resultdef:=voidtype;
  117. end;
  118. in_x86_cli,
  119. in_x86_sti:
  120. resultdef:=voidtype;
  121. else
  122. Result:=inherited pass_typecheck_cpu;
  123. end;
  124. end;
  125. function tx86inlinenode.first_cpu: tnode;
  126. begin
  127. Result:=nil;
  128. case inlinenumber of
  129. in_x86_inportb,
  130. in_x86_inportw,
  131. in_x86_inportl:
  132. expectloc:=LOC_REGISTER;
  133. in_x86_outportb,
  134. in_x86_outportw,
  135. in_x86_outportl,
  136. in_x86_cli,
  137. in_x86_sti:
  138. expectloc:=LOC_VOID;
  139. else
  140. Result:=inherited first_cpu;
  141. end;
  142. end;
  143. function tx86inlinenode.first_pi : tnode;
  144. begin
  145. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  146. begin
  147. expectloc:=LOC_FPUREGISTER;
  148. first_pi := nil;
  149. end
  150. else
  151. result:=inherited;
  152. end;
  153. function tx86inlinenode.first_arctan_real : tnode;
  154. begin
  155. {$ifdef i8086}
  156. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  157. so we need to use the RTL helper on these FPUs }
  158. if current_settings.cputype < cpu_386 then
  159. begin
  160. result := inherited;
  161. exit;
  162. end;
  163. {$endif i8086}
  164. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  165. begin
  166. expectloc:=LOC_FPUREGISTER;
  167. first_arctan_real := nil;
  168. end
  169. else
  170. result:=inherited;
  171. end;
  172. function tx86inlinenode.first_abs_real : tnode;
  173. begin
  174. if use_vectorfpu(resultdef) then
  175. expectloc:=LOC_MMREGISTER
  176. else
  177. expectloc:=LOC_FPUREGISTER;
  178. first_abs_real := nil;
  179. end;
  180. function tx86inlinenode.first_sqr_real : tnode;
  181. begin
  182. if use_vectorfpu(resultdef) then
  183. expectloc:=LOC_MMREGISTER
  184. else
  185. expectloc:=LOC_FPUREGISTER;
  186. first_sqr_real := nil;
  187. end;
  188. function tx86inlinenode.first_sqrt_real : tnode;
  189. begin
  190. if use_vectorfpu(resultdef) then
  191. expectloc:=LOC_MMREGISTER
  192. else
  193. expectloc:=LOC_FPUREGISTER;
  194. first_sqrt_real := nil;
  195. end;
  196. function tx86inlinenode.first_ln_real : tnode;
  197. begin
  198. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  199. begin
  200. expectloc:=LOC_FPUREGISTER;
  201. first_ln_real := nil;
  202. end
  203. else
  204. result:=inherited;
  205. end;
  206. function tx86inlinenode.first_cos_real : tnode;
  207. begin
  208. {$ifdef i8086}
  209. { FCOS is 387+ }
  210. if current_settings.cputype < cpu_386 then
  211. begin
  212. result := inherited;
  213. exit;
  214. end;
  215. {$endif i8086}
  216. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  217. begin
  218. expectloc:=LOC_FPUREGISTER;
  219. result:=nil;
  220. end
  221. else
  222. result:=inherited;
  223. end;
  224. function tx86inlinenode.first_sin_real : tnode;
  225. begin
  226. {$ifdef i8086}
  227. { FSIN is 387+ }
  228. if current_settings.cputype < cpu_386 then
  229. begin
  230. result := inherited;
  231. exit;
  232. end;
  233. {$endif i8086}
  234. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  235. begin
  236. expectloc:=LOC_FPUREGISTER;
  237. result:=nil;
  238. end
  239. else
  240. result:=inherited;
  241. end;
  242. function tx86inlinenode.first_round_real : tnode;
  243. begin
  244. maybe_remove_round_trunc_typeconv;
  245. {$ifdef x86_64}
  246. if use_vectorfpu(left.resultdef) then
  247. expectloc:=LOC_REGISTER
  248. else
  249. {$endif x86_64}
  250. expectloc:=LOC_REFERENCE;
  251. result:=nil;
  252. end;
  253. function tx86inlinenode.first_trunc_real: tnode;
  254. begin
  255. maybe_remove_round_trunc_typeconv;
  256. if (cs_opt_size in current_settings.optimizerswitches)
  257. {$ifdef x86_64}
  258. and not(use_vectorfpu(left.resultdef))
  259. {$endif x86_64}
  260. then
  261. result:=inherited
  262. else
  263. begin
  264. {$ifdef x86_64}
  265. if use_vectorfpu(left.resultdef) then
  266. expectloc:=LOC_REGISTER
  267. else
  268. {$endif x86_64}
  269. expectloc:=LOC_REFERENCE;
  270. result:=nil;
  271. end;
  272. end;
  273. function tx86inlinenode.first_popcnt: tnode;
  274. begin
  275. Result:=nil;
  276. {$ifndef i8086}
  277. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  278. {$ifdef i386}
  279. and not is_64bit(left.resultdef)
  280. {$endif i386}
  281. then
  282. expectloc:=LOC_REGISTER
  283. else
  284. {$endif not i8086}
  285. Result:=inherited first_popcnt
  286. end;
  287. function tx86inlinenode.first_fma : tnode;
  288. begin
  289. {$ifndef i8086}
  290. if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
  291. ((is_double(resultdef)) or (is_single(resultdef))) then
  292. begin
  293. expectloc:=LOC_MMREGISTER;
  294. Result:=nil;
  295. end
  296. else
  297. {$endif i8086}
  298. Result:=inherited first_fma;
  299. end;
  300. function tx86inlinenode.first_frac_real : tnode;
  301. begin
  302. if (current_settings.fputype>=fpu_sse41) and
  303. ((is_double(resultdef)) or (is_single(resultdef))) then
  304. begin
  305. maybe_remove_round_trunc_typeconv;
  306. expectloc:=LOC_MMREGISTER;
  307. Result:=nil;
  308. end
  309. else
  310. Result:=inherited first_frac_real;
  311. end;
  312. function tx86inlinenode.first_int_real : tnode;
  313. begin
  314. if (current_settings.fputype>=fpu_sse41) and
  315. ((is_double(resultdef)) or (is_single(resultdef))) then
  316. begin
  317. Result:=nil;
  318. expectloc:=LOC_MMREGISTER;
  319. end
  320. else
  321. Result:=inherited first_int_real;
  322. end;
  323. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  324. var
  325. temp : tnode;
  326. begin
  327. if (current_settings.fputype>=fpu_sse41) and
  328. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  329. not(nf_explicit in left.flags) and
  330. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  331. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  332. begin
  333. { get rid of the type conversion }
  334. temp:=ttypeconvnode(left).left;
  335. ttypeconvnode(left).left:=nil;
  336. left.free;
  337. left:=temp;
  338. result:=self.getcopy;
  339. tinlinenode(result).resultdef:=temp.resultdef;
  340. typecheckpass(result);
  341. end
  342. else
  343. Result:=inherited simplify(forinline);
  344. end;
  345. procedure tx86inlinenode.pass_generate_code_cpu;
  346. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  347. var
  348. portnumber: tnode;
  349. begin
  350. portnumber:=left;
  351. secondpass(portnumber);
  352. if (portnumber.location.loc=LOC_CONSTANT) and
  353. (portnumber.location.value>=0) and
  354. (portnumber.location.value<=255) then
  355. begin
  356. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  357. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  358. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  359. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  360. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  361. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  362. end
  363. else
  364. begin
  365. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  366. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  367. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  368. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  369. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  370. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  371. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  372. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  373. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  374. end;
  375. end;
  376. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  377. var
  378. portnumber, portdata: tnode;
  379. begin
  380. portnumber:=tcallparanode(tcallparanode(left).right).left;
  381. portdata:=tcallparanode(left).left;
  382. secondpass(portdata);
  383. secondpass(portnumber);
  384. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  385. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  386. if (portnumber.location.loc=LOC_CONSTANT) and
  387. (portnumber.location.value>=0) and
  388. (portnumber.location.value<=255) then
  389. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  390. else
  391. begin
  392. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  393. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  394. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  395. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  396. end;
  397. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  398. end;
  399. begin
  400. case inlinenumber of
  401. in_x86_inportb:
  402. inport(NR_AL,S_B,u8inttype);
  403. in_x86_inportw:
  404. inport(NR_AX,S_W,u16inttype);
  405. in_x86_inportl:
  406. inport(NR_EAX,S_L,s32inttype);
  407. in_x86_outportb:
  408. outport(NR_AL,S_B,u8inttype);
  409. in_x86_outportw:
  410. outport(NR_AX,S_W,u16inttype);
  411. in_x86_outportl:
  412. outport(NR_EAX,S_L,s32inttype);
  413. in_x86_cli:
  414. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  415. in_x86_sti:
  416. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  417. else
  418. inherited pass_generate_code_cpu;
  419. end;
  420. end;
  421. procedure tx86inlinenode.second_pi;
  422. begin
  423. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  424. emit_none(A_FLDPI,S_NO);
  425. tcgx86(cg).inc_fpu_stack;
  426. location.register:=NR_FPU_RESULT_REG;
  427. end;
  428. { load the FPU into the an fpu register }
  429. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  430. begin
  431. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  432. location.register:=NR_FPU_RESULT_REG;
  433. secondpass(lnode);
  434. case lnode.location.loc of
  435. LOC_FPUREGISTER:
  436. ;
  437. LOC_CFPUREGISTER:
  438. begin
  439. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  440. lnode.location.size,lnode.location.register,location.register);
  441. end;
  442. LOC_REFERENCE,LOC_CREFERENCE:
  443. begin
  444. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  445. lnode.location.size,lnode.location.size,
  446. lnode.location.reference,location.register);
  447. end;
  448. LOC_MMREGISTER,LOC_CMMREGISTER:
  449. begin
  450. location:=lnode.location;
  451. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,resultdef,false);
  452. end;
  453. else
  454. internalerror(309991);
  455. end;
  456. end;
  457. procedure tx86inlinenode.second_arctan_real;
  458. begin
  459. load_fpu_location(left);
  460. emit_none(A_FLD1,S_NO);
  461. emit_none(A_FPATAN,S_NO);
  462. end;
  463. procedure tx86inlinenode.second_abs_real;
  464. var
  465. href : treference;
  466. begin
  467. if use_vectorfpu(resultdef) then
  468. begin
  469. secondpass(left);
  470. if left.location.loc<>LOC_MMREGISTER then
  471. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  472. if UseAVX then
  473. begin
  474. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  475. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  476. end
  477. else
  478. location:=left.location;
  479. case tfloatdef(resultdef).floattype of
  480. s32real:
  481. begin
  482. reference_reset_symbol(href,current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA),0,4,[]);
  483. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  484. if UseAVX then
  485. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  486. A_VANDPS,S_XMM,href,left.location.register,location.register))
  487. else
  488. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  489. end;
  490. s64real:
  491. begin
  492. reference_reset_symbol(href,current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA),0,4,[]);
  493. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  494. if UseAVX then
  495. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  496. A_VANDPD,S_XMM,href,left.location.register,location.register))
  497. else
  498. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  499. end;
  500. else
  501. internalerror(200506081);
  502. end;
  503. end
  504. else
  505. begin
  506. load_fpu_location(left);
  507. emit_none(A_FABS,S_NO);
  508. end;
  509. end;
  510. procedure tx86inlinenode.second_round_real;
  511. begin
  512. {$ifdef x86_64}
  513. if use_vectorfpu(left.resultdef) then
  514. begin
  515. secondpass(left);
  516. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  517. location_reset(location,LOC_REGISTER,OS_S64);
  518. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  519. if UseAVX then
  520. case left.location.size of
  521. OS_F32:
  522. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  523. OS_F64:
  524. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  525. else
  526. internalerror(2007031402);
  527. end
  528. else
  529. case left.location.size of
  530. OS_F32:
  531. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  532. OS_F64:
  533. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  534. else
  535. internalerror(2007031402);
  536. end;
  537. end
  538. else
  539. {$endif x86_64}
  540. begin
  541. load_fpu_location(left);
  542. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  543. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  544. emit_ref(A_FISTP,S_IQ,location.reference);
  545. tcgx86(cg).dec_fpu_stack;
  546. emit_none(A_FWAIT,S_NO);
  547. end;
  548. end;
  549. procedure tx86inlinenode.second_trunc_real;
  550. var
  551. oldcw,newcw : treference;
  552. begin
  553. {$ifdef x86_64}
  554. if use_vectorfpu(left.resultdef) and
  555. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  556. begin
  557. secondpass(left);
  558. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  559. location_reset(location,LOC_REGISTER,OS_S64);
  560. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  561. if UseAVX then
  562. case left.location.size of
  563. OS_F32:
  564. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  565. OS_F64:
  566. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  567. else
  568. internalerror(2007031401);
  569. end
  570. else
  571. case left.location.size of
  572. OS_F32:
  573. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  574. OS_F64:
  575. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  576. else
  577. internalerror(2007031401);
  578. end;
  579. end
  580. else
  581. {$endif x86_64}
  582. begin
  583. if (current_settings.fputype>=fpu_sse3) then
  584. begin
  585. load_fpu_location(left);
  586. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  587. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  588. emit_ref(A_FISTTP,S_IQ,location.reference);
  589. tcgx86(cg).dec_fpu_stack;
  590. end
  591. else
  592. begin
  593. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  594. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  595. {$ifdef i8086}
  596. if current_settings.cputype<=cpu_286 then
  597. begin
  598. emit_ref(A_FSTCW,S_NO,newcw);
  599. emit_ref(A_FSTCW,S_NO,oldcw);
  600. emit_none(A_FWAIT,S_NO);
  601. end
  602. else
  603. {$endif i8086}
  604. begin
  605. emit_ref(A_FNSTCW,S_NO,newcw);
  606. emit_ref(A_FNSTCW,S_NO,oldcw);
  607. end;
  608. emit_const_ref(A_OR,S_W,$0f00,newcw);
  609. load_fpu_location(left);
  610. emit_ref(A_FLDCW,S_NO,newcw);
  611. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  612. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  613. emit_ref(A_FISTP,S_IQ,location.reference);
  614. tcgx86(cg).dec_fpu_stack;
  615. emit_ref(A_FLDCW,S_NO,oldcw);
  616. emit_none(A_FWAIT,S_NO);
  617. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  618. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  619. end;
  620. end;
  621. end;
  622. procedure tx86inlinenode.second_sqr_real;
  623. begin
  624. if use_vectorfpu(resultdef) then
  625. begin
  626. secondpass(left);
  627. location_reset(location,LOC_MMREGISTER,left.location.size);
  628. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  629. if UseAVX then
  630. begin
  631. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  632. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  633. end
  634. else
  635. begin
  636. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  637. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  638. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  639. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  640. end;
  641. end
  642. else
  643. begin
  644. load_fpu_location(left);
  645. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  646. end;
  647. end;
  648. procedure tx86inlinenode.second_sqrt_real;
  649. begin
  650. if use_vectorfpu(resultdef) then
  651. begin
  652. secondpass(left);
  653. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  654. location_reset(location,LOC_MMREGISTER,left.location.size);
  655. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  656. if UseAVX then
  657. case tfloatdef(resultdef).floattype of
  658. s32real:
  659. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  660. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  661. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  662. s64real:
  663. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  664. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  665. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  666. else
  667. internalerror(200510031);
  668. end
  669. else
  670. case tfloatdef(resultdef).floattype of
  671. s32real:
  672. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  673. s64real:
  674. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  675. else
  676. internalerror(200510031);
  677. end;
  678. end
  679. else
  680. begin
  681. load_fpu_location(left);
  682. emit_none(A_FSQRT,S_NO);
  683. end;
  684. end;
  685. procedure tx86inlinenode.second_ln_real;
  686. begin
  687. load_fpu_location(left);
  688. emit_none(A_FLDLN2,S_NO);
  689. emit_none(A_FXCH,S_NO);
  690. emit_none(A_FYL2X,S_NO);
  691. end;
  692. procedure tx86inlinenode.second_cos_real;
  693. begin
  694. {$ifdef i8086}
  695. { FCOS is 387+ }
  696. if current_settings.cputype < cpu_386 then
  697. begin
  698. inherited;
  699. exit;
  700. end;
  701. {$endif i8086}
  702. load_fpu_location(left);
  703. emit_none(A_FCOS,S_NO);
  704. end;
  705. procedure tx86inlinenode.second_sin_real;
  706. begin
  707. {$ifdef i8086}
  708. { FSIN is 387+ }
  709. if current_settings.cputype < cpu_386 then
  710. begin
  711. inherited;
  712. exit;
  713. end;
  714. {$endif i8086}
  715. load_fpu_location(left);
  716. emit_none(A_FSIN,S_NO)
  717. end;
  718. procedure tx86inlinenode.second_prefetch;
  719. var
  720. ref : treference;
  721. r : tregister;
  722. checkpointer_used : boolean;
  723. begin
  724. {$if defined(i386) or defined(i8086)}
  725. if current_settings.cputype>=cpu_Pentium3 then
  726. {$endif i386 or i8086}
  727. begin
  728. { do not call Checkpointer for left node }
  729. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  730. if checkpointer_used then
  731. node_change_local_switch(left,cs_checkpointer,false);
  732. secondpass(left);
  733. if checkpointer_used then
  734. node_change_local_switch(left,cs_checkpointer,false);
  735. case left.location.loc of
  736. LOC_CREFERENCE,
  737. LOC_REFERENCE:
  738. begin
  739. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  740. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  741. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  742. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  743. end;
  744. else
  745. { nothing to prefetch };
  746. end;
  747. end;
  748. end;
  749. procedure tx86inlinenode.second_abs_long;
  750. var
  751. hregister : tregister;
  752. opsize : tcgsize;
  753. hp : taicpu;
  754. begin
  755. {$if defined(i8086) or defined(i386)}
  756. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  757. begin
  758. opsize:=def_cgsize(left.resultdef);
  759. secondpass(left);
  760. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  761. location:=left.location;
  762. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  763. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  764. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  765. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  766. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  767. end
  768. else
  769. {$endif i8086 or i386}
  770. begin
  771. opsize:=def_cgsize(left.resultdef);
  772. secondpass(left);
  773. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  774. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  775. location:=left.location;
  776. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  777. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  778. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  779. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  780. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  781. hp.condition:=C_NS;
  782. current_asmdata.CurrAsmList.concat(hp);
  783. end;
  784. end;
  785. {*****************************************************************************
  786. INCLUDE/EXCLUDE GENERIC HANDLING
  787. *****************************************************************************}
  788. procedure tx86inlinenode.second_IncludeExclude;
  789. var
  790. hregister,
  791. hregister2: tregister;
  792. setbase : aint;
  793. bitsperop,l : longint;
  794. cgop : topcg;
  795. asmop : tasmop;
  796. opdef : tdef;
  797. opsize,
  798. orgsize: tcgsize;
  799. begin
  800. {$ifdef i8086}
  801. { BTS and BTR are 386+ }
  802. if current_settings.cputype < cpu_386 then
  803. begin
  804. inherited;
  805. exit;
  806. end;
  807. {$endif i8086}
  808. if is_smallset(tcallparanode(left).resultdef) then
  809. begin
  810. opdef:=tcallparanode(left).resultdef;
  811. opsize:=int_cgsize(opdef.size)
  812. end
  813. else
  814. begin
  815. opdef:=u32inttype;
  816. opsize:=OS_32;
  817. end;
  818. bitsperop:=(8*tcgsize2size[opsize]);
  819. secondpass(tcallparanode(left).left);
  820. secondpass(tcallparanode(tcallparanode(left).right).left);
  821. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  822. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  823. begin
  824. { calculate bit position }
  825. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  826. { determine operator }
  827. if inlinenumber=in_include_x_y then
  828. cgop:=OP_OR
  829. else
  830. begin
  831. cgop:=OP_AND;
  832. l:=not(l);
  833. end;
  834. case tcallparanode(left).left.location.loc of
  835. LOC_REFERENCE :
  836. begin
  837. inc(tcallparanode(left).left.location.reference.offset,
  838. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  839. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  840. end;
  841. LOC_CREGISTER :
  842. cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
  843. else
  844. internalerror(200405022);
  845. end;
  846. end
  847. else
  848. begin
  849. orgsize:=opsize;
  850. if opsize in [OS_8,OS_S8] then
  851. begin
  852. opdef:=u32inttype;
  853. opsize:=OS_32;
  854. end;
  855. { determine asm operator }
  856. if inlinenumber=in_include_x_y then
  857. asmop:=A_BTS
  858. else
  859. asmop:=A_BTR;
  860. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  861. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  862. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  863. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  864. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  865. else
  866. begin
  867. { second argument can't be an 8 bit register either }
  868. hregister2:=tcallparanode(left).left.location.register;
  869. if (orgsize in [OS_8,OS_S8]) then
  870. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  871. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  872. end;
  873. end;
  874. end;
  875. procedure tx86inlinenode.second_popcnt;
  876. var
  877. opsize: tcgsize;
  878. begin
  879. secondpass(left);
  880. opsize:=tcgsize2unsigned[left.location.size];
  881. { no 8 Bit popcont }
  882. if opsize=OS_8 then
  883. opsize:=OS_16;
  884. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  885. (left.location.size<>opsize) then
  886. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  887. location_reset(location,LOC_REGISTER,opsize);
  888. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  889. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  890. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  891. else
  892. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  893. end;
  894. procedure tx86inlinenode.second_fma;
  895. const
  896. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  897. (
  898. { positive product }
  899. (
  900. { positive third operand }
  901. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  902. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  903. ),
  904. { negative third operand }
  905. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  906. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  907. )
  908. ),
  909. { negative product }
  910. (
  911. { positive third operand }
  912. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  913. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  914. ),
  915. { negative third operand }
  916. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  917. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  918. )
  919. )
  920. );
  921. var
  922. paraarray : array[1..3] of tnode;
  923. memop,
  924. i : integer;
  925. negop3,
  926. negproduct,
  927. gotmem : boolean;
  928. begin
  929. {$ifndef i8086}
  930. if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
  931. begin
  932. negop3:=false;
  933. negproduct:=false;
  934. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  935. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  936. paraarray[3]:=tcallparanode(parameters).paravalue;
  937. { check if a neg. node can be removed
  938. this is possible because changing the sign of
  939. a floating point number does not affect its absolute
  940. value in any way
  941. }
  942. if paraarray[1].nodetype=unaryminusn then
  943. begin
  944. paraarray[1]:=tunarynode(paraarray[1]).left;
  945. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  946. only no code is generated for it }
  947. negproduct:=not(negproduct);
  948. end;
  949. if paraarray[2].nodetype=unaryminusn then
  950. begin
  951. paraarray[2]:=tunarynode(paraarray[2]).left;
  952. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  953. only no code is generated for it }
  954. negproduct:=not(negproduct);
  955. end;
  956. if paraarray[3].nodetype=unaryminusn then
  957. begin
  958. paraarray[3]:=tunarynode(paraarray[3]).left;
  959. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  960. only no code is generated for it }
  961. negop3:=true;
  962. end;
  963. for i:=1 to 3 do
  964. secondpass(paraarray[i]);
  965. { only one memory operand is allowed }
  966. gotmem:=false;
  967. memop:=0;
  968. for i:=1 to 3 do
  969. begin
  970. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  971. begin
  972. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  973. begin
  974. memop:=i;
  975. gotmem:=true;
  976. end
  977. else
  978. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  979. end;
  980. end;
  981. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  982. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  983. if gotmem then
  984. begin
  985. case memop of
  986. 1:
  987. begin
  988. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  989. paraarray[3].location.register,location.register,mms_movescalar);
  990. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  991. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  992. end;
  993. 2:
  994. begin
  995. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  996. paraarray[3].location.register,location.register,mms_movescalar);
  997. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  998. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  999. end;
  1000. 3:
  1001. begin
  1002. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1003. paraarray[1].location.register,location.register,mms_movescalar);
  1004. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1005. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1006. end
  1007. else
  1008. internalerror(2014041301);
  1009. end;
  1010. end
  1011. else
  1012. begin
  1013. { try to use the location which is already in a temp. mm register as destination,
  1014. so the compiler might be able to re-use the register }
  1015. if paraarray[1].location.loc=LOC_MMREGISTER then
  1016. begin
  1017. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1018. paraarray[1].location.register,location.register,mms_movescalar);
  1019. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1020. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1021. end
  1022. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1023. begin
  1024. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1025. paraarray[2].location.register,location.register,mms_movescalar);
  1026. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1027. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1028. end
  1029. else
  1030. begin
  1031. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1032. paraarray[3].location.register,location.register,mms_movescalar);
  1033. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1034. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1035. end;
  1036. end;
  1037. end
  1038. else
  1039. {$endif i8086}
  1040. internalerror(2014032301);
  1041. end;
  1042. procedure tx86inlinenode.second_frac_real;
  1043. var
  1044. extrareg : TRegister;
  1045. begin
  1046. if use_vectorfpu(resultdef) then
  1047. begin
  1048. secondpass(left);
  1049. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1050. location_reset(location,LOC_MMREGISTER,left.location.size);
  1051. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1052. if UseAVX then
  1053. case tfloatdef(resultdef).floattype of
  1054. s32real:
  1055. begin
  1056. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1057. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1058. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1059. end;
  1060. s64real:
  1061. begin
  1062. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1063. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1064. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1065. end;
  1066. else
  1067. internalerror(2017052102);
  1068. end
  1069. else
  1070. begin
  1071. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1072. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1073. case tfloatdef(resultdef).floattype of
  1074. s32real:
  1075. begin
  1076. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1077. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1078. end;
  1079. s64real:
  1080. begin
  1081. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1082. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1083. end;
  1084. else
  1085. internalerror(2017052103);
  1086. end;
  1087. end;
  1088. end
  1089. else
  1090. internalerror(2017052101);
  1091. end;
  1092. procedure tx86inlinenode.second_int_real;
  1093. var
  1094. extrareg : TRegister;
  1095. begin
  1096. if use_vectorfpu(resultdef) then
  1097. begin
  1098. secondpass(left);
  1099. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1100. location_reset(location,LOC_MMREGISTER,left.location.size);
  1101. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1102. if UseAVX then
  1103. case tfloatdef(resultdef).floattype of
  1104. s32real:
  1105. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1106. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1107. s64real:
  1108. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1109. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1110. else
  1111. internalerror(2017052105);
  1112. end
  1113. else
  1114. begin
  1115. case tfloatdef(resultdef).floattype of
  1116. s32real:
  1117. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1118. s64real:
  1119. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1120. else
  1121. internalerror(2017052106);
  1122. end;
  1123. end;
  1124. end
  1125. else
  1126. internalerror(2017052107);
  1127. end;
  1128. end.