nx86inl.pas 60 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function first_minmax: tnode; override;
  48. function simplify(forinline : boolean) : tnode; override;
  49. { second pass override to generate these nodes }
  50. procedure pass_generate_code_cpu;override;
  51. procedure second_IncludeExclude;override;
  52. procedure second_pi; override;
  53. procedure second_arctan_real; override;
  54. procedure second_abs_real; override;
  55. procedure second_round_real; override;
  56. procedure second_sqr_real; override;
  57. procedure second_sqrt_real; override;
  58. procedure second_ln_real; override;
  59. procedure second_cos_real; override;
  60. procedure second_sin_real; override;
  61. procedure second_trunc_real; override;
  62. procedure second_prefetch;override;
  63. procedure second_abs_long;override;
  64. procedure second_popcnt;override;
  65. procedure second_fma;override;
  66. procedure second_frac_real;override;
  67. procedure second_int_real;override;
  68. procedure second_high;override;
  69. procedure second_minmax;override;
  70. private
  71. procedure load_fpu_location(lnode: tnode);
  72. end;
  73. implementation
  74. uses
  75. systems,
  76. globtype,globals,
  77. verbose,compinnr,fmodule,
  78. defutil,
  79. aasmbase,aasmdata,aasmcpu,
  80. symconst,symtype,symdef,symcpu,
  81. ncnv,
  82. htypechk,
  83. cgbase,pass_1,pass_2,
  84. cpuinfo,cpubase,nutils,
  85. ncal,ncgutil,nld,ncon,
  86. tgobj,
  87. cga,cgutils,cgx86,cgobj,hlcgobj;
  88. {*****************************************************************************
  89. TX86INLINENODE
  90. *****************************************************************************}
  91. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  92. begin
  93. { only makes a difference for x86_64 }
  94. end;
  95. function tx86inlinenode.pass_typecheck_cpu: tnode;
  96. begin
  97. Result:=nil;
  98. case inlinenumber of
  99. in_x86_inportb:
  100. begin
  101. CheckParameters(1);
  102. resultdef:=u8inttype;
  103. end;
  104. in_x86_inportw:
  105. begin
  106. CheckParameters(1);
  107. resultdef:=u16inttype;
  108. end;
  109. in_x86_inportl:
  110. begin
  111. CheckParameters(1);
  112. resultdef:=s32inttype;
  113. end;
  114. in_x86_outportb,
  115. in_x86_outportw,
  116. in_x86_outportl:
  117. begin
  118. CheckParameters(2);
  119. resultdef:=voidtype;
  120. end;
  121. in_x86_cli,
  122. in_x86_sti:
  123. resultdef:=voidtype;
  124. in_x86_get_cs,
  125. in_x86_get_ss,
  126. in_x86_get_ds,
  127. in_x86_get_es,
  128. in_x86_get_fs,
  129. in_x86_get_gs:
  130. {$ifdef i8086}
  131. resultdef:=u16inttype;
  132. {$else i8086}
  133. resultdef:=s32inttype;
  134. {$endif i8086}
  135. { include automatically generated code }
  136. {$i x86mmtype.inc}
  137. else
  138. Result:=inherited pass_typecheck_cpu;
  139. end;
  140. end;
  141. function tx86inlinenode.first_cpu: tnode;
  142. begin
  143. Result:=nil;
  144. case inlinenumber of
  145. in_x86_inportb,
  146. in_x86_inportw,
  147. in_x86_inportl,
  148. in_x86_get_cs,
  149. in_x86_get_ss,
  150. in_x86_get_ds,
  151. in_x86_get_es,
  152. in_x86_get_fs,
  153. in_x86_get_gs:
  154. expectloc:=LOC_REGISTER;
  155. in_x86_outportb,
  156. in_x86_outportw,
  157. in_x86_outportl,
  158. in_x86_cli,
  159. in_x86_sti:
  160. expectloc:=LOC_VOID;
  161. { include automatically generated code }
  162. {$i x86mmfirst.inc}
  163. else
  164. Result:=inherited first_cpu;
  165. end;
  166. end;
  167. function tx86inlinenode.first_pi : tnode;
  168. begin
  169. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  170. begin
  171. expectloc:=LOC_FPUREGISTER;
  172. first_pi := nil;
  173. end
  174. else
  175. result:=inherited;
  176. end;
  177. function tx86inlinenode.first_arctan_real : tnode;
  178. begin
  179. {$ifdef i8086}
  180. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  181. so we need to use the RTL helper on these FPUs }
  182. if current_settings.cputype < cpu_386 then
  183. begin
  184. result := inherited;
  185. exit;
  186. end;
  187. {$endif i8086}
  188. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  189. begin
  190. expectloc:=LOC_FPUREGISTER;
  191. first_arctan_real := nil;
  192. end
  193. else
  194. result:=inherited;
  195. end;
  196. function tx86inlinenode.first_abs_real : tnode;
  197. begin
  198. if use_vectorfpu(resultdef) then
  199. expectloc:=LOC_MMREGISTER
  200. else
  201. expectloc:=LOC_FPUREGISTER;
  202. first_abs_real := nil;
  203. end;
  204. function tx86inlinenode.first_sqr_real : tnode;
  205. begin
  206. if use_vectorfpu(resultdef) then
  207. expectloc:=LOC_MMREGISTER
  208. else
  209. expectloc:=LOC_FPUREGISTER;
  210. first_sqr_real := nil;
  211. end;
  212. function tx86inlinenode.first_sqrt_real : tnode;
  213. begin
  214. if use_vectorfpu(resultdef) then
  215. expectloc:=LOC_MMREGISTER
  216. else
  217. expectloc:=LOC_FPUREGISTER;
  218. first_sqrt_real := nil;
  219. end;
  220. function tx86inlinenode.first_ln_real : tnode;
  221. begin
  222. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  223. begin
  224. expectloc:=LOC_FPUREGISTER;
  225. first_ln_real := nil;
  226. end
  227. else
  228. result:=inherited;
  229. end;
  230. function tx86inlinenode.first_cos_real : tnode;
  231. begin
  232. {$ifdef i8086}
  233. { FCOS is 387+ }
  234. if current_settings.cputype < cpu_386 then
  235. begin
  236. result := inherited;
  237. exit;
  238. end;
  239. {$endif i8086}
  240. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  241. begin
  242. expectloc:=LOC_FPUREGISTER;
  243. result:=nil;
  244. end
  245. else
  246. result:=inherited;
  247. end;
  248. function tx86inlinenode.first_sin_real : tnode;
  249. begin
  250. {$ifdef i8086}
  251. { FSIN is 387+ }
  252. if current_settings.cputype < cpu_386 then
  253. begin
  254. result := inherited;
  255. exit;
  256. end;
  257. {$endif i8086}
  258. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  259. begin
  260. expectloc:=LOC_FPUREGISTER;
  261. result:=nil;
  262. end
  263. else
  264. result:=inherited;
  265. end;
  266. function tx86inlinenode.first_round_real : tnode;
  267. begin
  268. maybe_remove_round_trunc_typeconv;
  269. {$ifdef x86_64}
  270. if use_vectorfpu(left.resultdef) then
  271. expectloc:=LOC_REGISTER
  272. else
  273. {$endif x86_64}
  274. expectloc:=LOC_REFERENCE;
  275. result:=nil;
  276. end;
  277. function tx86inlinenode.first_trunc_real: tnode;
  278. begin
  279. maybe_remove_round_trunc_typeconv;
  280. if (cs_opt_size in current_settings.optimizerswitches)
  281. {$ifdef x86_64}
  282. and not(use_vectorfpu(left.resultdef))
  283. {$endif x86_64}
  284. then
  285. result:=inherited
  286. else
  287. begin
  288. {$ifdef x86_64}
  289. if use_vectorfpu(left.resultdef) then
  290. expectloc:=LOC_REGISTER
  291. else
  292. {$endif x86_64}
  293. expectloc:=LOC_REFERENCE;
  294. result:=nil;
  295. end;
  296. end;
  297. function tx86inlinenode.first_popcnt: tnode;
  298. begin
  299. Result:=nil;
  300. {$ifndef i8086}
  301. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  302. {$ifdef i386}
  303. and not is_64bit(left.resultdef)
  304. {$endif i386}
  305. then
  306. expectloc:=LOC_REGISTER
  307. else
  308. {$endif not i8086}
  309. Result:=inherited first_popcnt
  310. end;
  311. function tx86inlinenode.first_fma : tnode;
  312. begin
  313. {$ifndef i8086}
  314. if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
  315. ((is_double(resultdef)) or (is_single(resultdef))) then
  316. begin
  317. expectloc:=LOC_MMREGISTER;
  318. Result:=nil;
  319. end
  320. else
  321. {$endif i8086}
  322. Result:=inherited first_fma;
  323. end;
  324. function tx86inlinenode.first_frac_real : tnode;
  325. begin
  326. if (current_settings.fputype>=fpu_sse41) and
  327. ((is_double(resultdef)) or (is_single(resultdef))) then
  328. begin
  329. maybe_remove_round_trunc_typeconv;
  330. expectloc:=LOC_MMREGISTER;
  331. Result:=nil;
  332. end
  333. else
  334. Result:=inherited first_frac_real;
  335. end;
  336. function tx86inlinenode.first_int_real : tnode;
  337. begin
  338. if (current_settings.fputype>=fpu_sse41) and
  339. ((is_double(resultdef)) or (is_single(resultdef))) then
  340. begin
  341. Result:=nil;
  342. expectloc:=LOC_MMREGISTER;
  343. end
  344. else
  345. Result:=inherited first_int_real;
  346. end;
  347. function tx86inlinenode.first_minmax: tnode;
  348. begin
  349. {$ifndef i8086}
  350. if
  351. {$ifdef i386}
  352. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  353. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  354. {$else i386}
  355. ((is_double(resultdef)) or (is_single(resultdef)))
  356. {$endif i386}
  357. then
  358. begin
  359. expectloc:=LOC_MMREGISTER;
  360. Result:=nil;
  361. end
  362. else
  363. {$endif i8086}
  364. Result:=inherited first_minmax;
  365. end;
  366. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  367. var
  368. temp : tnode;
  369. begin
  370. if (current_settings.fputype>=fpu_sse41) and
  371. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  372. not(nf_explicit in left.flags) and
  373. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  374. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  375. begin
  376. { get rid of the type conversion }
  377. temp:=ttypeconvnode(left).left;
  378. ttypeconvnode(left).left:=nil;
  379. left.free;
  380. left:=temp;
  381. result:=self.getcopy;
  382. tinlinenode(result).resultdef:=temp.resultdef;
  383. typecheckpass(result);
  384. end
  385. else
  386. Result:=inherited simplify(forinline);
  387. end;
  388. procedure tx86inlinenode.pass_generate_code_cpu;
  389. var
  390. paraarray : array[1..4] of tnode;
  391. i : integer;
  392. op: TAsmOp;
  393. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  394. var
  395. portnumber: tnode;
  396. begin
  397. portnumber:=left;
  398. secondpass(portnumber);
  399. if (portnumber.location.loc=LOC_CONSTANT) and
  400. (portnumber.location.value>=0) and
  401. (portnumber.location.value<=255) then
  402. begin
  403. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  404. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  405. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  406. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  407. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  408. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  409. end
  410. else
  411. begin
  412. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  413. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  414. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  415. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  416. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  417. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  418. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  419. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  420. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  421. end;
  422. end;
  423. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  424. var
  425. portnumber, portdata: tnode;
  426. begin
  427. portnumber:=tcallparanode(tcallparanode(left).right).left;
  428. portdata:=tcallparanode(left).left;
  429. secondpass(portdata);
  430. secondpass(portnumber);
  431. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  432. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  433. if (portnumber.location.loc=LOC_CONSTANT) and
  434. (portnumber.location.value>=0) and
  435. (portnumber.location.value<=255) then
  436. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  437. else
  438. begin
  439. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  440. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  441. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  442. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  443. end;
  444. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  445. end;
  446. procedure get_segreg(segreg:tregister);
  447. begin
  448. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  449. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  450. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  451. end;
  452. function GetConstInt(n: tnode): longint;
  453. begin
  454. Result:=0;
  455. if is_constintnode(n) then
  456. result:=tordconstnode(n).value.svalue
  457. else
  458. Message(type_e_constant_expr_expected);
  459. end;
  460. procedure GetParameters(count: longint);
  461. var
  462. i: longint;
  463. p: tnode;
  464. begin
  465. if (count=1) and
  466. (not (left is tcallparanode)) then
  467. paraarray[1]:=left
  468. else
  469. begin
  470. p:=left;
  471. for i := count downto 1 do
  472. begin
  473. paraarray[i]:=tcallparanode(p).paravalue;
  474. p:=tcallparanode(p).nextpara;
  475. end;
  476. end;
  477. end;
  478. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  479. var
  480. reg : tregister;
  481. begin
  482. if (l.loc<>LOC_MMXREGISTER) and
  483. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  484. begin
  485. reg:=tcgx86(cg).getmmxregister(list);
  486. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  487. location_freetemp(list,l);
  488. location_reset(l,LOC_MMXREGISTER,OS_M64);
  489. l.register:=reg;
  490. end;
  491. end;
  492. procedure location_make_ref(var loc: tlocation);
  493. var
  494. hloc: tlocation;
  495. begin
  496. case loc.loc of
  497. LOC_CREGISTER,
  498. LOC_REGISTER:
  499. begin
  500. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  501. hloc.reference.base:=loc.register;
  502. loc:=hloc;
  503. end;
  504. LOC_CREFERENCE,
  505. LOC_REFERENCE:
  506. begin
  507. end;
  508. else
  509. begin
  510. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  511. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  512. hloc.reference.base:=loc.register;
  513. loc:=hloc;
  514. end;
  515. end;
  516. end;
  517. begin
  518. FillChar(paraarray,sizeof(paraarray),0);
  519. case inlinenumber of
  520. in_x86_inportb:
  521. inport(NR_AL,S_B,u8inttype);
  522. in_x86_inportw:
  523. inport(NR_AX,S_W,u16inttype);
  524. in_x86_inportl:
  525. inport(NR_EAX,S_L,s32inttype);
  526. in_x86_outportb:
  527. outport(NR_AL,S_B,u8inttype);
  528. in_x86_outportw:
  529. outport(NR_AX,S_W,u16inttype);
  530. in_x86_outportl:
  531. outport(NR_EAX,S_L,s32inttype);
  532. in_x86_cli:
  533. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  534. in_x86_sti:
  535. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  536. in_x86_get_cs:
  537. get_segreg(NR_CS);
  538. in_x86_get_ss:
  539. get_segreg(NR_SS);
  540. in_x86_get_ds:
  541. get_segreg(NR_DS);
  542. in_x86_get_es:
  543. get_segreg(NR_ES);
  544. in_x86_get_fs:
  545. get_segreg(NR_FS);
  546. in_x86_get_gs:
  547. get_segreg(NR_GS);
  548. {$i x86mmsecond.inc}
  549. else
  550. inherited pass_generate_code_cpu;
  551. end;
  552. end;
  553. procedure tx86inlinenode.second_pi;
  554. begin
  555. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  556. emit_none(A_FLDPI,S_NO);
  557. tcgx86(cg).inc_fpu_stack;
  558. location.register:=NR_FPU_RESULT_REG;
  559. end;
  560. { load the FPU into the an fpu register }
  561. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  562. begin
  563. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  564. location.register:=NR_FPU_RESULT_REG;
  565. secondpass(lnode);
  566. case lnode.location.loc of
  567. LOC_FPUREGISTER:
  568. ;
  569. LOC_CFPUREGISTER:
  570. begin
  571. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  572. lnode.location.size,lnode.location.register,location.register);
  573. end;
  574. LOC_REFERENCE,LOC_CREFERENCE:
  575. begin
  576. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  577. lnode.location.size,lnode.location.size,
  578. lnode.location.reference,location.register);
  579. end;
  580. LOC_MMREGISTER,LOC_CMMREGISTER:
  581. begin
  582. location:=lnode.location;
  583. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,lnode.resultdef,false);
  584. end;
  585. else
  586. internalerror(309991);
  587. end;
  588. end;
  589. procedure tx86inlinenode.second_arctan_real;
  590. begin
  591. load_fpu_location(left);
  592. emit_none(A_FLD1,S_NO);
  593. emit_none(A_FPATAN,S_NO);
  594. end;
  595. procedure tx86inlinenode.second_abs_real;
  596. function needs_indirect:boolean; inline;
  597. begin
  598. result:=(tf_supports_packages in target_info.flags) and
  599. (target_info.system in systems_indirect_var_imports);
  600. end;
  601. var
  602. href : treference;
  603. sym : tasmsymbol;
  604. begin
  605. if use_vectorfpu(resultdef) then
  606. begin
  607. secondpass(left);
  608. if left.location.loc<>LOC_MMREGISTER then
  609. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  610. if UseAVX then
  611. begin
  612. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  613. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  614. end
  615. else
  616. location:=left.location;
  617. case tfloatdef(resultdef).floattype of
  618. s32real:
  619. begin
  620. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  621. reference_reset_symbol(href,sym,0,4,[]);
  622. current_module.add_extern_asmsym(sym);
  623. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  624. if UseAVX then
  625. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  626. A_VANDPS,S_XMM,href,left.location.register,location.register))
  627. else
  628. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  629. end;
  630. s64real:
  631. begin
  632. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  633. reference_reset_symbol(href,sym,0,4,[]);
  634. current_module.add_extern_asmsym(sym);
  635. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  636. if UseAVX then
  637. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  638. A_VANDPD,S_XMM,href,left.location.register,location.register))
  639. else
  640. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  641. end;
  642. else
  643. internalerror(200506081);
  644. end;
  645. end
  646. else
  647. begin
  648. load_fpu_location(left);
  649. emit_none(A_FABS,S_NO);
  650. end;
  651. end;
  652. procedure tx86inlinenode.second_round_real;
  653. begin
  654. {$ifdef x86_64}
  655. if use_vectorfpu(left.resultdef) then
  656. begin
  657. secondpass(left);
  658. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  659. location_reset(location,LOC_REGISTER,OS_S64);
  660. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  661. if UseAVX then
  662. case left.location.size of
  663. OS_F32:
  664. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  665. OS_F64:
  666. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  667. else
  668. internalerror(2007031402);
  669. end
  670. else
  671. case left.location.size of
  672. OS_F32:
  673. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  674. OS_F64:
  675. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  676. else
  677. internalerror(2007031404);
  678. end;
  679. end
  680. else
  681. {$endif x86_64}
  682. begin
  683. load_fpu_location(left);
  684. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  685. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  686. emit_ref(A_FISTP,S_IQ,location.reference);
  687. tcgx86(cg).dec_fpu_stack;
  688. emit_none(A_FWAIT,S_NO);
  689. end;
  690. end;
  691. procedure tx86inlinenode.second_trunc_real;
  692. var
  693. oldcw,newcw : treference;
  694. begin
  695. {$ifdef x86_64}
  696. if use_vectorfpu(left.resultdef) and
  697. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  698. begin
  699. secondpass(left);
  700. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  701. location_reset(location,LOC_REGISTER,OS_S64);
  702. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  703. if UseAVX then
  704. case left.location.size of
  705. OS_F32:
  706. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  707. OS_F64:
  708. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  709. else
  710. internalerror(2007031401);
  711. end
  712. else
  713. case left.location.size of
  714. OS_F32:
  715. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  716. OS_F64:
  717. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  718. else
  719. internalerror(2007031403);
  720. end;
  721. end
  722. else
  723. {$endif x86_64}
  724. begin
  725. if (current_settings.fputype>=fpu_sse3) then
  726. begin
  727. load_fpu_location(left);
  728. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  729. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  730. emit_ref(A_FISTTP,S_IQ,location.reference);
  731. tcgx86(cg).dec_fpu_stack;
  732. end
  733. else
  734. begin
  735. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  736. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  737. {$ifdef i8086}
  738. if current_settings.cputype<=cpu_286 then
  739. begin
  740. emit_ref(A_FSTCW,S_NO,newcw);
  741. emit_ref(A_FSTCW,S_NO,oldcw);
  742. emit_none(A_FWAIT,S_NO);
  743. end
  744. else
  745. {$endif i8086}
  746. begin
  747. emit_ref(A_FNSTCW,S_NO,newcw);
  748. emit_ref(A_FNSTCW,S_NO,oldcw);
  749. end;
  750. emit_const_ref(A_OR,S_W,$0f00,newcw);
  751. load_fpu_location(left);
  752. emit_ref(A_FLDCW,S_NO,newcw);
  753. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  754. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  755. emit_ref(A_FISTP,S_IQ,location.reference);
  756. tcgx86(cg).dec_fpu_stack;
  757. emit_ref(A_FLDCW,S_NO,oldcw);
  758. emit_none(A_FWAIT,S_NO);
  759. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  760. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  761. end;
  762. end;
  763. end;
  764. procedure tx86inlinenode.second_sqr_real;
  765. begin
  766. if use_vectorfpu(resultdef) then
  767. begin
  768. secondpass(left);
  769. location_reset(location,LOC_MMREGISTER,left.location.size);
  770. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  771. if UseAVX then
  772. begin
  773. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  774. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  775. end
  776. else
  777. begin
  778. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  779. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  780. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  781. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  782. end;
  783. end
  784. else
  785. begin
  786. load_fpu_location(left);
  787. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  788. end;
  789. end;
  790. procedure tx86inlinenode.second_sqrt_real;
  791. begin
  792. if use_vectorfpu(resultdef) then
  793. begin
  794. secondpass(left);
  795. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  796. location_reset(location,LOC_MMREGISTER,left.location.size);
  797. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  798. if UseAVX then
  799. case tfloatdef(resultdef).floattype of
  800. s32real:
  801. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  802. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  803. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  804. s64real:
  805. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  806. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  807. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  808. else
  809. internalerror(200510031);
  810. end
  811. else
  812. case tfloatdef(resultdef).floattype of
  813. s32real:
  814. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  815. s64real:
  816. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  817. else
  818. internalerror(2005100303);
  819. end;
  820. end
  821. else
  822. begin
  823. load_fpu_location(left);
  824. emit_none(A_FSQRT,S_NO);
  825. end;
  826. end;
  827. procedure tx86inlinenode.second_ln_real;
  828. begin
  829. load_fpu_location(left);
  830. emit_none(A_FLDLN2,S_NO);
  831. emit_none(A_FXCH,S_NO);
  832. emit_none(A_FYL2X,S_NO);
  833. end;
  834. procedure tx86inlinenode.second_cos_real;
  835. begin
  836. {$ifdef i8086}
  837. { FCOS is 387+ }
  838. if current_settings.cputype < cpu_386 then
  839. begin
  840. inherited;
  841. exit;
  842. end;
  843. {$endif i8086}
  844. load_fpu_location(left);
  845. emit_none(A_FCOS,S_NO);
  846. end;
  847. procedure tx86inlinenode.second_sin_real;
  848. begin
  849. {$ifdef i8086}
  850. { FSIN is 387+ }
  851. if current_settings.cputype < cpu_386 then
  852. begin
  853. inherited;
  854. exit;
  855. end;
  856. {$endif i8086}
  857. load_fpu_location(left);
  858. emit_none(A_FSIN,S_NO)
  859. end;
  860. procedure tx86inlinenode.second_prefetch;
  861. var
  862. ref : treference;
  863. r : tregister;
  864. checkpointer_used : boolean;
  865. begin
  866. {$if defined(i386) or defined(i8086)}
  867. if current_settings.cputype>=cpu_Pentium3 then
  868. {$endif i386 or i8086}
  869. begin
  870. { do not call Checkpointer for left node }
  871. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  872. if checkpointer_used then
  873. node_change_local_switch(left,cs_checkpointer,false);
  874. secondpass(left);
  875. if checkpointer_used then
  876. node_change_local_switch(left,cs_checkpointer,false);
  877. case left.location.loc of
  878. LOC_CREFERENCE,
  879. LOC_REFERENCE:
  880. begin
  881. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  882. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  883. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  884. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  885. end;
  886. else
  887. { nothing to prefetch };
  888. end;
  889. end;
  890. end;
  891. procedure tx86inlinenode.second_abs_long;
  892. var
  893. hregister : tregister;
  894. opsize : tcgsize;
  895. hp : taicpu;
  896. begin
  897. {$if defined(i8086) or defined(i386)}
  898. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  899. begin
  900. opsize:=def_cgsize(left.resultdef);
  901. secondpass(left);
  902. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  903. location:=left.location;
  904. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  905. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  906. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  907. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  908. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  909. end
  910. else
  911. {$endif i8086 or i386}
  912. begin
  913. opsize:=def_cgsize(left.resultdef);
  914. secondpass(left);
  915. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  916. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  917. location:=left.location;
  918. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  919. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  920. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  921. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  922. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  923. hp.condition:=C_NS;
  924. current_asmdata.CurrAsmList.concat(hp);
  925. end;
  926. end;
  927. {*****************************************************************************
  928. INCLUDE/EXCLUDE GENERIC HANDLING
  929. *****************************************************************************}
  930. procedure tx86inlinenode.second_IncludeExclude;
  931. var
  932. hregister,
  933. hregister2: tregister;
  934. setbase : aint;
  935. bitsperop,l : longint;
  936. cgop : topcg;
  937. asmop : tasmop;
  938. opdef : tdef;
  939. opsize,
  940. orgsize: tcgsize;
  941. begin
  942. {$ifdef i8086}
  943. { BTS and BTR are 386+ }
  944. if current_settings.cputype < cpu_386 then
  945. begin
  946. inherited;
  947. exit;
  948. end;
  949. {$endif i8086}
  950. if is_smallset(tcallparanode(left).resultdef) then
  951. begin
  952. opdef:=tcallparanode(left).resultdef;
  953. opsize:=int_cgsize(opdef.size)
  954. end
  955. else
  956. begin
  957. opdef:=u32inttype;
  958. opsize:=OS_32;
  959. end;
  960. bitsperop:=(8*tcgsize2size[opsize]);
  961. secondpass(tcallparanode(left).left);
  962. secondpass(tcallparanode(tcallparanode(left).right).left);
  963. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  964. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  965. begin
  966. { calculate bit position }
  967. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  968. { determine operator }
  969. if inlinenumber=in_include_x_y then
  970. cgop:=OP_OR
  971. else
  972. begin
  973. cgop:=OP_AND;
  974. l:=not(l);
  975. end;
  976. case tcallparanode(left).left.location.loc of
  977. LOC_REFERENCE :
  978. begin
  979. inc(tcallparanode(left).left.location.reference.offset,
  980. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  981. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  982. end;
  983. LOC_CREGISTER :
  984. cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
  985. else
  986. internalerror(200405022);
  987. end;
  988. end
  989. else
  990. begin
  991. orgsize:=opsize;
  992. if opsize in [OS_8,OS_S8] then
  993. begin
  994. opdef:=u32inttype;
  995. opsize:=OS_32;
  996. end;
  997. { determine asm operator }
  998. if inlinenumber=in_include_x_y then
  999. asmop:=A_BTS
  1000. else
  1001. asmop:=A_BTR;
  1002. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  1003. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  1004. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  1005. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  1006. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  1007. else
  1008. begin
  1009. { second argument can't be an 8 bit register either }
  1010. hregister2:=tcallparanode(left).left.location.register;
  1011. if (orgsize in [OS_8,OS_S8]) then
  1012. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  1013. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  1014. end;
  1015. end;
  1016. end;
  1017. procedure tx86inlinenode.second_popcnt;
  1018. var
  1019. opsize: tcgsize;
  1020. begin
  1021. secondpass(left);
  1022. opsize:=tcgsize2unsigned[left.location.size];
  1023. { no 8 Bit popcont }
  1024. if opsize=OS_8 then
  1025. opsize:=OS_16;
  1026. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1027. (left.location.size<>opsize) then
  1028. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1029. location_reset(location,LOC_REGISTER,opsize);
  1030. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1031. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1032. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1033. else
  1034. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1035. if resultdef.size=1 then
  1036. begin
  1037. location.size:=OS_8;
  1038. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1039. end;
  1040. end;
  1041. procedure tx86inlinenode.second_fma;
  1042. {$ifndef i8086}
  1043. const
  1044. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1045. (
  1046. { positive product }
  1047. (
  1048. { positive third operand }
  1049. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1050. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1051. ),
  1052. { negative third operand }
  1053. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1054. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1055. )
  1056. ),
  1057. { negative product }
  1058. (
  1059. { positive third operand }
  1060. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1061. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1062. ),
  1063. { negative third operand }
  1064. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1065. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1066. )
  1067. )
  1068. );
  1069. var
  1070. paraarray : array[1..3] of tnode;
  1071. memop,
  1072. i : integer;
  1073. negop3,
  1074. negproduct,
  1075. gotmem : boolean;
  1076. {$endif i8086}
  1077. begin
  1078. {$ifndef i8086}
  1079. if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
  1080. begin
  1081. negop3:=false;
  1082. negproduct:=false;
  1083. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1084. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1085. paraarray[3]:=tcallparanode(parameters).paravalue;
  1086. { check if a neg. node can be removed
  1087. this is possible because changing the sign of
  1088. a floating point number does not affect its absolute
  1089. value in any way
  1090. }
  1091. if paraarray[1].nodetype=unaryminusn then
  1092. begin
  1093. paraarray[1]:=tunarynode(paraarray[1]).left;
  1094. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1095. only no code is generated for it }
  1096. negproduct:=not(negproduct);
  1097. end;
  1098. if paraarray[2].nodetype=unaryminusn then
  1099. begin
  1100. paraarray[2]:=tunarynode(paraarray[2]).left;
  1101. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1102. only no code is generated for it }
  1103. negproduct:=not(negproduct);
  1104. end;
  1105. if paraarray[3].nodetype=unaryminusn then
  1106. begin
  1107. paraarray[3]:=tunarynode(paraarray[3]).left;
  1108. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1109. only no code is generated for it }
  1110. negop3:=true;
  1111. end;
  1112. for i:=1 to 3 do
  1113. secondpass(paraarray[i]);
  1114. { only one memory operand is allowed }
  1115. gotmem:=false;
  1116. memop:=0;
  1117. { in case parameters come on the FPU stack, we have to pop them in reverse order as we
  1118. called secondpass }
  1119. for i:=3 downto 1 do
  1120. begin
  1121. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1122. begin
  1123. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1124. begin
  1125. memop:=i;
  1126. gotmem:=true;
  1127. end
  1128. else
  1129. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1130. end;
  1131. end;
  1132. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1133. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1134. if gotmem then
  1135. begin
  1136. case memop of
  1137. 1:
  1138. begin
  1139. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1140. paraarray[3].location.register,location.register,mms_movescalar);
  1141. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1142. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1143. end;
  1144. 2:
  1145. begin
  1146. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1147. paraarray[3].location.register,location.register,mms_movescalar);
  1148. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1149. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1150. end;
  1151. 3:
  1152. begin
  1153. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1154. paraarray[1].location.register,location.register,mms_movescalar);
  1155. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1156. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1157. end
  1158. else
  1159. internalerror(2014041301);
  1160. end;
  1161. end
  1162. else
  1163. begin
  1164. { try to use the location which is already in a temp. mm register as destination,
  1165. so the compiler might be able to re-use the register }
  1166. if paraarray[1].location.loc=LOC_MMREGISTER then
  1167. begin
  1168. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1169. paraarray[1].location.register,location.register,mms_movescalar);
  1170. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1171. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1172. end
  1173. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1174. begin
  1175. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1176. paraarray[2].location.register,location.register,mms_movescalar);
  1177. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1178. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1179. end
  1180. else
  1181. begin
  1182. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1183. paraarray[3].location.register,location.register,mms_movescalar);
  1184. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1185. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1186. end;
  1187. end;
  1188. end
  1189. else
  1190. {$endif i8086}
  1191. internalerror(2014032301);
  1192. end;
  1193. procedure tx86inlinenode.second_frac_real;
  1194. var
  1195. extrareg : TRegister;
  1196. begin
  1197. if use_vectorfpu(resultdef) then
  1198. begin
  1199. secondpass(left);
  1200. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1201. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1202. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1203. if UseAVX then
  1204. case tfloatdef(left.resultdef).floattype of
  1205. s32real:
  1206. begin
  1207. {$ifndef i8086}
  1208. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1209. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register))
  1210. else
  1211. {$endif not i8086}
  1212. begin
  1213. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1214. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1215. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1216. end;
  1217. end;
  1218. s64real:
  1219. begin
  1220. {$ifndef i8086}
  1221. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1222. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register))
  1223. else
  1224. {$endif not i8086}
  1225. begin
  1226. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1227. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1228. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1229. end;
  1230. end;
  1231. else
  1232. internalerror(2017052102);
  1233. end
  1234. else
  1235. begin
  1236. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1237. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1238. case tfloatdef(left.resultdef).floattype of
  1239. s32real:
  1240. begin
  1241. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1242. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1243. end;
  1244. s64real:
  1245. begin
  1246. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1247. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1248. end;
  1249. else
  1250. internalerror(2017052103);
  1251. end;
  1252. end;
  1253. if tfloatdef(left.resultdef).floattype<>tfloatdef(resultdef).floattype then
  1254. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,location.register,location.register,mms_movescalar);
  1255. end
  1256. else
  1257. internalerror(2017052101);
  1258. end;
  1259. procedure tx86inlinenode.second_int_real;
  1260. begin
  1261. if use_vectorfpu(resultdef) then
  1262. begin
  1263. secondpass(left);
  1264. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1265. location_reset(location,LOC_MMREGISTER,left.location.size);
  1266. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1267. if UseAVX then
  1268. case tfloatdef(resultdef).floattype of
  1269. s32real:
  1270. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1271. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1272. s64real:
  1273. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1274. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1275. else
  1276. internalerror(2017052105);
  1277. end
  1278. else
  1279. begin
  1280. case tfloatdef(resultdef).floattype of
  1281. s32real:
  1282. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1283. s64real:
  1284. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1285. else
  1286. internalerror(2017052106);
  1287. end;
  1288. end;
  1289. end
  1290. else
  1291. internalerror(2017052107);
  1292. end;
  1293. procedure tx86inlinenode.second_high;
  1294. var
  1295. donelab: tasmlabel;
  1296. hregister : tregister;
  1297. href : treference;
  1298. begin
  1299. secondpass(left);
  1300. if not(is_dynamic_array(left.resultdef)) then
  1301. Internalerror(2019122809);
  1302. { length in dynamic arrays is at offset -sizeof(pint) }
  1303. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1304. current_asmdata.getjumplabel(donelab);
  1305. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1306. flag is set in this case, so we can jump depending on it
  1307. when loading the actual high value, we have to take care later of the decreased value
  1308. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1309. we are using is not working as dec does not touch the carry flag }
  1310. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1311. { volatility of the dyn. array refers to the volatility of the
  1312. string pointer, not of the string data }
  1313. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1314. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1315. { if the string pointer is nil, the length is 0 -> reuse the register
  1316. that originally held the string pointer for the length, so that we
  1317. can keep the original nil/0 as length in that case }
  1318. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1319. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1320. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1321. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1322. location.register:=hregister;
  1323. end;
  1324. procedure tx86inlinenode.second_minmax;
  1325. {$ifndef i8086}
  1326. const
  1327. oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
  1328. (
  1329. (
  1330. (A_MINSS,A_MINSD),
  1331. (A_VMINSS,A_VMINSD)
  1332. ),
  1333. (
  1334. (A_MAXSS,A_MAXSD),
  1335. (A_VMAXSS,A_VMAXSD)
  1336. )
  1337. );
  1338. var
  1339. paraarray : array[1..2] of tnode;
  1340. memop,
  1341. i : integer;
  1342. gotmem : boolean;
  1343. op: TAsmOp;
  1344. {$endif i8086}
  1345. begin
  1346. {$ifndef i8086}
  1347. if
  1348. {$ifdef i386}
  1349. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  1350. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  1351. {$else i386}
  1352. is_single(resultdef) or is_double(resultdef)
  1353. {$endif i386}
  1354. then
  1355. begin
  1356. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1357. paraarray[2]:=tcallparanode(parameters).paravalue;
  1358. for i:=low(paraarray) to high(paraarray) do
  1359. secondpass(paraarray[i]);
  1360. { only one memory operand is allowed }
  1361. gotmem:=false;
  1362. memop:=0;
  1363. for i:=low(paraarray) to high(paraarray) do
  1364. begin
  1365. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1366. begin
  1367. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1368. begin
  1369. memop:=i;
  1370. gotmem:=true;
  1371. end
  1372. else
  1373. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1374. end;
  1375. end;
  1376. { due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
  1377. one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
  1378. if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
  1379. begin
  1380. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
  1381. gotmem:=false;
  1382. end;
  1383. op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
  1384. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1385. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1386. if gotmem then
  1387. begin
  1388. if UseAVX then
  1389. case memop of
  1390. 1:
  1391. emit_ref_reg_reg(op,S_NO,
  1392. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1393. 2:
  1394. emit_ref_reg_reg(op,S_NO,
  1395. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1396. else
  1397. internalerror(2020120504);
  1398. end
  1399. else
  1400. case memop of
  1401. 1:
  1402. begin
  1403. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1404. paraarray[2].location.register,location.register,mms_movescalar);
  1405. emit_ref_reg(op,S_NO,
  1406. paraarray[1].location.reference,location.register);
  1407. end;
  1408. 2:
  1409. begin
  1410. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1411. paraarray[1].location.register,location.register,mms_movescalar);
  1412. emit_ref_reg(op,S_NO,
  1413. paraarray[2].location.reference,location.register);
  1414. end;
  1415. else
  1416. internalerror(2020120601);
  1417. end;
  1418. end
  1419. else
  1420. begin
  1421. if UseAVX then
  1422. emit_reg_reg_reg(op,S_NO,
  1423. paraarray[2].location.register,paraarray[1].location.register,location.register)
  1424. else
  1425. begin
  1426. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1427. paraarray[1].location.register,location.register,mms_movescalar);
  1428. emit_reg_reg(op,S_NO,
  1429. paraarray[2].location.register,location.register)
  1430. end;
  1431. end;
  1432. end
  1433. else
  1434. {$endif i8086}
  1435. internalerror(2020120503);
  1436. end;
  1437. end.