nx86inl.pas 60 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function first_minmax: tnode; override;
  48. function simplify(forinline : boolean) : tnode; override;
  49. { second pass override to generate these nodes }
  50. procedure pass_generate_code_cpu;override;
  51. procedure second_IncludeExclude;override;
  52. procedure second_pi; override;
  53. procedure second_arctan_real; override;
  54. procedure second_abs_real; override;
  55. procedure second_round_real; override;
  56. procedure second_sqr_real; override;
  57. procedure second_sqrt_real; override;
  58. procedure second_ln_real; override;
  59. procedure second_cos_real; override;
  60. procedure second_sin_real; override;
  61. procedure second_trunc_real; override;
  62. procedure second_prefetch;override;
  63. procedure second_abs_long;override;
  64. procedure second_popcnt;override;
  65. procedure second_fma;override;
  66. procedure second_frac_real;override;
  67. procedure second_int_real;override;
  68. procedure second_high;override;
  69. procedure second_minmax;override;
  70. private
  71. procedure load_fpu_location(lnode: tnode);
  72. end;
  73. implementation
  74. uses
  75. systems,
  76. globtype,globals,
  77. verbose,compinnr,fmodule,
  78. defutil,
  79. aasmbase,aasmdata,aasmcpu,
  80. symconst,symtype,symdef,symcpu,
  81. ncnv,
  82. htypechk,
  83. cgbase,pass_1,pass_2,
  84. cpuinfo,cpubase,nutils,
  85. ncal,ncgutil,nld,ncon,
  86. tgobj,
  87. cga,cgutils,cgx86,cgobj,hlcgobj;
  88. {*****************************************************************************
  89. TX86INLINENODE
  90. *****************************************************************************}
  91. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  92. begin
  93. { only makes a difference for x86_64 }
  94. end;
  95. function tx86inlinenode.pass_typecheck_cpu: tnode;
  96. begin
  97. Result:=nil;
  98. case inlinenumber of
  99. in_x86_inportb:
  100. begin
  101. CheckParameters(1);
  102. resultdef:=u8inttype;
  103. end;
  104. in_x86_inportw:
  105. begin
  106. CheckParameters(1);
  107. resultdef:=u16inttype;
  108. end;
  109. in_x86_inportl:
  110. begin
  111. CheckParameters(1);
  112. resultdef:=s32inttype;
  113. end;
  114. in_x86_outportb,
  115. in_x86_outportw,
  116. in_x86_outportl:
  117. begin
  118. CheckParameters(2);
  119. resultdef:=voidtype;
  120. end;
  121. in_x86_cli,
  122. in_x86_sti:
  123. resultdef:=voidtype;
  124. in_x86_get_cs,
  125. in_x86_get_ss,
  126. in_x86_get_ds,
  127. in_x86_get_es,
  128. in_x86_get_fs,
  129. in_x86_get_gs:
  130. {$ifdef i8086}
  131. resultdef:=u16inttype;
  132. {$else i8086}
  133. resultdef:=s32inttype;
  134. {$endif i8086}
  135. { include automatically generated code }
  136. {$i x86mmtype.inc}
  137. else
  138. Result:=inherited pass_typecheck_cpu;
  139. end;
  140. end;
  141. function tx86inlinenode.first_cpu: tnode;
  142. begin
  143. Result:=nil;
  144. case inlinenumber of
  145. in_x86_inportb,
  146. in_x86_inportw,
  147. in_x86_inportl,
  148. in_x86_get_cs,
  149. in_x86_get_ss,
  150. in_x86_get_ds,
  151. in_x86_get_es,
  152. in_x86_get_fs,
  153. in_x86_get_gs:
  154. expectloc:=LOC_REGISTER;
  155. in_x86_outportb,
  156. in_x86_outportw,
  157. in_x86_outportl,
  158. in_x86_cli,
  159. in_x86_sti:
  160. expectloc:=LOC_VOID;
  161. { include automatically generated code }
  162. {$i x86mmfirst.inc}
  163. else
  164. Result:=inherited first_cpu;
  165. end;
  166. end;
  167. function tx86inlinenode.first_pi : tnode;
  168. begin
  169. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  170. begin
  171. expectloc:=LOC_FPUREGISTER;
  172. first_pi := nil;
  173. end
  174. else
  175. result:=inherited;
  176. end;
  177. function tx86inlinenode.first_arctan_real : tnode;
  178. begin
  179. {$ifdef i8086}
  180. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  181. so we need to use the RTL helper on these FPUs }
  182. if current_settings.cputype < cpu_386 then
  183. begin
  184. result := inherited;
  185. exit;
  186. end;
  187. {$endif i8086}
  188. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  189. begin
  190. expectloc:=LOC_FPUREGISTER;
  191. first_arctan_real := nil;
  192. end
  193. else
  194. result:=inherited;
  195. end;
  196. function tx86inlinenode.first_abs_real : tnode;
  197. begin
  198. if use_vectorfpu(resultdef) then
  199. expectloc:=LOC_MMREGISTER
  200. else
  201. expectloc:=LOC_FPUREGISTER;
  202. first_abs_real := nil;
  203. end;
  204. function tx86inlinenode.first_sqr_real : tnode;
  205. begin
  206. if use_vectorfpu(resultdef) then
  207. expectloc:=LOC_MMREGISTER
  208. else
  209. expectloc:=LOC_FPUREGISTER;
  210. first_sqr_real := nil;
  211. end;
  212. function tx86inlinenode.first_sqrt_real : tnode;
  213. begin
  214. if use_vectorfpu(resultdef) then
  215. expectloc:=LOC_MMREGISTER
  216. else
  217. expectloc:=LOC_FPUREGISTER;
  218. first_sqrt_real := nil;
  219. end;
  220. function tx86inlinenode.first_ln_real : tnode;
  221. begin
  222. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  223. begin
  224. expectloc:=LOC_FPUREGISTER;
  225. first_ln_real := nil;
  226. end
  227. else
  228. result:=inherited;
  229. end;
  230. function tx86inlinenode.first_cos_real : tnode;
  231. begin
  232. {$ifdef i8086}
  233. { FCOS is 387+ }
  234. if current_settings.cputype < cpu_386 then
  235. begin
  236. result := inherited;
  237. exit;
  238. end;
  239. {$endif i8086}
  240. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  241. begin
  242. expectloc:=LOC_FPUREGISTER;
  243. result:=nil;
  244. end
  245. else
  246. result:=inherited;
  247. end;
  248. function tx86inlinenode.first_sin_real : tnode;
  249. begin
  250. {$ifdef i8086}
  251. { FSIN is 387+ }
  252. if current_settings.cputype < cpu_386 then
  253. begin
  254. result := inherited;
  255. exit;
  256. end;
  257. {$endif i8086}
  258. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  259. begin
  260. expectloc:=LOC_FPUREGISTER;
  261. result:=nil;
  262. end
  263. else
  264. result:=inherited;
  265. end;
  266. function tx86inlinenode.first_round_real : tnode;
  267. begin
  268. maybe_remove_round_trunc_typeconv;
  269. {$ifdef x86_64}
  270. if use_vectorfpu(left.resultdef) then
  271. expectloc:=LOC_REGISTER
  272. else
  273. {$endif x86_64}
  274. expectloc:=LOC_REFERENCE;
  275. result:=nil;
  276. end;
  277. function tx86inlinenode.first_trunc_real: tnode;
  278. begin
  279. maybe_remove_round_trunc_typeconv;
  280. if (cs_opt_size in current_settings.optimizerswitches)
  281. {$ifdef x86_64}
  282. and not(use_vectorfpu(left.resultdef))
  283. {$endif x86_64}
  284. then
  285. result:=inherited
  286. else
  287. begin
  288. {$ifdef x86_64}
  289. if use_vectorfpu(left.resultdef) then
  290. expectloc:=LOC_REGISTER
  291. else
  292. {$endif x86_64}
  293. expectloc:=LOC_REFERENCE;
  294. result:=nil;
  295. end;
  296. end;
  297. function tx86inlinenode.first_popcnt: tnode;
  298. begin
  299. Result:=nil;
  300. {$ifndef i8086}
  301. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  302. {$ifdef i386}
  303. and not is_64bit(left.resultdef)
  304. {$endif i386}
  305. then
  306. expectloc:=LOC_REGISTER
  307. else
  308. {$endif not i8086}
  309. Result:=inherited first_popcnt
  310. end;
  311. function tx86inlinenode.first_fma : tnode;
  312. begin
  313. {$ifndef i8086}
  314. if ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]) and
  315. ((is_double(resultdef)) or (is_single(resultdef))) then
  316. begin
  317. expectloc:=LOC_MMREGISTER;
  318. Result:=nil;
  319. end
  320. else
  321. {$endif i8086}
  322. Result:=inherited first_fma;
  323. end;
  324. function tx86inlinenode.first_frac_real : tnode;
  325. begin
  326. if (current_settings.fputype>=fpu_sse41) and
  327. ((is_double(resultdef)) or (is_single(resultdef))) then
  328. begin
  329. maybe_remove_round_trunc_typeconv;
  330. expectloc:=LOC_MMREGISTER;
  331. Result:=nil;
  332. end
  333. else
  334. Result:=inherited first_frac_real;
  335. end;
  336. function tx86inlinenode.first_int_real : tnode;
  337. begin
  338. if (current_settings.fputype>=fpu_sse41) and
  339. ((is_double(resultdef)) or (is_single(resultdef))) then
  340. begin
  341. Result:=nil;
  342. expectloc:=LOC_MMREGISTER;
  343. end
  344. else
  345. Result:=inherited first_int_real;
  346. end;
  347. function tx86inlinenode.first_minmax: tnode;
  348. begin
  349. {$ifndef i8086}
  350. if
  351. {$ifdef i386}
  352. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  353. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  354. {$else i386}
  355. ((is_double(resultdef)) or (is_single(resultdef)))
  356. {$endif i386}
  357. then
  358. begin
  359. expectloc:=LOC_MMREGISTER;
  360. Result:=nil;
  361. end
  362. else
  363. {$endif i8086}
  364. Result:=inherited first_minmax;
  365. end;
  366. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  367. var
  368. temp : tnode;
  369. begin
  370. if (current_settings.fputype>=fpu_sse41) and
  371. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  372. not(nf_explicit in left.flags) and
  373. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  374. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  375. begin
  376. { get rid of the type conversion }
  377. temp:=ttypeconvnode(left).left;
  378. ttypeconvnode(left).left:=nil;
  379. left.free;
  380. left:=temp;
  381. result:=self.getcopy;
  382. tinlinenode(result).resultdef:=temp.resultdef;
  383. typecheckpass(result);
  384. end
  385. else
  386. Result:=inherited simplify(forinline);
  387. end;
  388. procedure tx86inlinenode.pass_generate_code_cpu;
  389. var
  390. paraarray : array[1..4] of tnode;
  391. i : integer;
  392. op: TAsmOp;
  393. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  394. var
  395. portnumber: tnode;
  396. begin
  397. portnumber:=left;
  398. secondpass(portnumber);
  399. if (portnumber.location.loc=LOC_CONSTANT) and
  400. (portnumber.location.value>=0) and
  401. (portnumber.location.value<=255) then
  402. begin
  403. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  404. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  405. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  406. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  407. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  408. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  409. end
  410. else
  411. begin
  412. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  413. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  414. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  415. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  416. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  417. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  418. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  419. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  420. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  421. end;
  422. end;
  423. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  424. var
  425. portnumber, portdata: tnode;
  426. begin
  427. portnumber:=tcallparanode(tcallparanode(left).right).left;
  428. portdata:=tcallparanode(left).left;
  429. secondpass(portdata);
  430. secondpass(portnumber);
  431. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  432. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  433. if (portnumber.location.loc=LOC_CONSTANT) and
  434. (portnumber.location.value>=0) and
  435. (portnumber.location.value<=255) then
  436. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  437. else
  438. begin
  439. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  440. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  441. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  442. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  443. end;
  444. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  445. end;
  446. procedure get_segreg(segreg:tregister);
  447. begin
  448. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  449. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  450. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  451. end;
  452. function GetConstInt(n: tnode): longint;
  453. begin
  454. Result:=0;
  455. if is_constintnode(n) then
  456. result:=tordconstnode(n).value.svalue
  457. else
  458. Message(type_e_constant_expr_expected);
  459. end;
  460. procedure GetParameters(count: longint);
  461. var
  462. i: longint;
  463. p: tnode;
  464. begin
  465. if (count=1) and
  466. (not (left is tcallparanode)) then
  467. paraarray[1]:=left
  468. else
  469. begin
  470. p:=left;
  471. for i := count downto 1 do
  472. begin
  473. paraarray[i]:=tcallparanode(p).paravalue;
  474. p:=tcallparanode(p).nextpara;
  475. end;
  476. end;
  477. end;
  478. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  479. var
  480. reg : tregister;
  481. begin
  482. if (l.loc<>LOC_MMXREGISTER) and
  483. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  484. begin
  485. reg:=tcgx86(cg).getmmxregister(list);
  486. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  487. location_freetemp(list,l);
  488. location_reset(l,LOC_MMXREGISTER,OS_M64);
  489. l.register:=reg;
  490. end;
  491. end;
  492. procedure location_make_ref(var loc: tlocation);
  493. var
  494. hloc: tlocation;
  495. begin
  496. case loc.loc of
  497. LOC_CREGISTER,
  498. LOC_REGISTER:
  499. begin
  500. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  501. hloc.reference.base:=loc.register;
  502. loc:=hloc;
  503. end;
  504. LOC_CREFERENCE,
  505. LOC_REFERENCE:
  506. begin
  507. end;
  508. else
  509. begin
  510. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  511. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  512. hloc.reference.base:=loc.register;
  513. loc:=hloc;
  514. end;
  515. end;
  516. end;
  517. begin
  518. FillChar(paraarray,sizeof(paraarray),0);
  519. case inlinenumber of
  520. in_x86_inportb:
  521. inport(NR_AL,S_B,u8inttype);
  522. in_x86_inportw:
  523. inport(NR_AX,S_W,u16inttype);
  524. in_x86_inportl:
  525. inport(NR_EAX,S_L,s32inttype);
  526. in_x86_outportb:
  527. outport(NR_AL,S_B,u8inttype);
  528. in_x86_outportw:
  529. outport(NR_AX,S_W,u16inttype);
  530. in_x86_outportl:
  531. outport(NR_EAX,S_L,s32inttype);
  532. in_x86_cli:
  533. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  534. in_x86_sti:
  535. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  536. in_x86_get_cs:
  537. get_segreg(NR_CS);
  538. in_x86_get_ss:
  539. get_segreg(NR_SS);
  540. in_x86_get_ds:
  541. get_segreg(NR_DS);
  542. in_x86_get_es:
  543. get_segreg(NR_ES);
  544. in_x86_get_fs:
  545. get_segreg(NR_FS);
  546. in_x86_get_gs:
  547. get_segreg(NR_GS);
  548. {$i x86mmsecond.inc}
  549. else
  550. inherited pass_generate_code_cpu;
  551. end;
  552. end;
  553. procedure tx86inlinenode.second_pi;
  554. begin
  555. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  556. emit_none(A_FLDPI,S_NO);
  557. tcgx86(cg).inc_fpu_stack;
  558. location.register:=NR_FPU_RESULT_REG;
  559. end;
  560. { load the FPU into the an fpu register }
  561. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  562. begin
  563. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  564. location.register:=NR_FPU_RESULT_REG;
  565. secondpass(lnode);
  566. case lnode.location.loc of
  567. LOC_FPUREGISTER:
  568. ;
  569. LOC_CFPUREGISTER:
  570. begin
  571. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  572. lnode.location.size,lnode.location.register,location.register);
  573. end;
  574. LOC_REFERENCE,LOC_CREFERENCE:
  575. begin
  576. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  577. lnode.location.size,lnode.location.size,
  578. lnode.location.reference,location.register);
  579. end;
  580. LOC_MMREGISTER,LOC_CMMREGISTER:
  581. begin
  582. location:=lnode.location;
  583. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,lnode.resultdef,false);
  584. end;
  585. else
  586. internalerror(309991);
  587. end;
  588. end;
  589. procedure tx86inlinenode.second_arctan_real;
  590. begin
  591. load_fpu_location(left);
  592. emit_none(A_FLD1,S_NO);
  593. emit_none(A_FPATAN,S_NO);
  594. end;
  595. procedure tx86inlinenode.second_abs_real;
  596. function needs_indirect:boolean; inline;
  597. begin
  598. result:=(tf_supports_packages in target_info.flags) and
  599. (target_info.system in systems_indirect_var_imports);
  600. end;
  601. var
  602. href : treference;
  603. sym : tasmsymbol;
  604. begin
  605. if use_vectorfpu(resultdef) then
  606. begin
  607. secondpass(left);
  608. if left.location.loc<>LOC_MMREGISTER then
  609. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  610. if UseAVX then
  611. begin
  612. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  613. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  614. end
  615. else
  616. location:=left.location;
  617. case tfloatdef(resultdef).floattype of
  618. s32real:
  619. begin
  620. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  621. reference_reset_symbol(href,sym,0,4,[]);
  622. current_module.add_extern_asmsym(sym);
  623. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  624. if UseAVX then
  625. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  626. A_VANDPS,S_XMM,href,left.location.register,location.register))
  627. else
  628. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  629. end;
  630. s64real:
  631. begin
  632. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  633. reference_reset_symbol(href,sym,0,4,[]);
  634. current_module.add_extern_asmsym(sym);
  635. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  636. if UseAVX then
  637. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  638. A_VANDPD,S_XMM,href,left.location.register,location.register))
  639. else
  640. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  641. end;
  642. else
  643. internalerror(200506081);
  644. end;
  645. end
  646. else
  647. begin
  648. load_fpu_location(left);
  649. emit_none(A_FABS,S_NO);
  650. end;
  651. end;
  652. procedure tx86inlinenode.second_round_real;
  653. begin
  654. {$ifdef x86_64}
  655. if use_vectorfpu(left.resultdef) then
  656. begin
  657. secondpass(left);
  658. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  659. location_reset(location,LOC_REGISTER,OS_S64);
  660. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  661. if UseAVX then
  662. case left.location.size of
  663. OS_F32:
  664. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  665. OS_F64:
  666. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  667. else
  668. internalerror(2007031402);
  669. end
  670. else
  671. case left.location.size of
  672. OS_F32:
  673. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  674. OS_F64:
  675. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  676. else
  677. internalerror(2007031404);
  678. end;
  679. end
  680. else
  681. {$endif x86_64}
  682. begin
  683. load_fpu_location(left);
  684. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  685. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  686. emit_ref(A_FISTP,S_IQ,location.reference);
  687. tcgx86(cg).dec_fpu_stack;
  688. emit_none(A_FWAIT,S_NO);
  689. end;
  690. end;
  691. procedure tx86inlinenode.second_trunc_real;
  692. var
  693. oldcw,newcw : treference;
  694. begin
  695. {$ifdef x86_64}
  696. if use_vectorfpu(left.resultdef) and
  697. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  698. begin
  699. secondpass(left);
  700. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  701. location_reset(location,LOC_REGISTER,OS_S64);
  702. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  703. if UseAVX then
  704. case left.location.size of
  705. OS_F32:
  706. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  707. OS_F64:
  708. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  709. else
  710. internalerror(2007031401);
  711. end
  712. else
  713. case left.location.size of
  714. OS_F32:
  715. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  716. OS_F64:
  717. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  718. else
  719. internalerror(2007031403);
  720. end;
  721. end
  722. else
  723. {$endif x86_64}
  724. begin
  725. if (current_settings.fputype>=fpu_sse3) then
  726. begin
  727. load_fpu_location(left);
  728. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  729. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  730. emit_ref(A_FISTTP,S_IQ,location.reference);
  731. tcgx86(cg).dec_fpu_stack;
  732. end
  733. else
  734. begin
  735. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  736. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  737. {$ifdef i8086}
  738. if current_settings.cputype<=cpu_286 then
  739. begin
  740. emit_ref(A_FSTCW,S_NO,newcw);
  741. emit_ref(A_FSTCW,S_NO,oldcw);
  742. emit_none(A_FWAIT,S_NO);
  743. end
  744. else
  745. {$endif i8086}
  746. begin
  747. emit_ref(A_FNSTCW,S_NO,newcw);
  748. emit_ref(A_FNSTCW,S_NO,oldcw);
  749. end;
  750. emit_const_ref(A_OR,S_W,$0f00,newcw);
  751. load_fpu_location(left);
  752. emit_ref(A_FLDCW,S_NO,newcw);
  753. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  754. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  755. emit_ref(A_FISTP,S_IQ,location.reference);
  756. tcgx86(cg).dec_fpu_stack;
  757. emit_ref(A_FLDCW,S_NO,oldcw);
  758. emit_none(A_FWAIT,S_NO);
  759. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  760. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  761. end;
  762. end;
  763. end;
  764. procedure tx86inlinenode.second_sqr_real;
  765. begin
  766. if use_vectorfpu(resultdef) then
  767. begin
  768. secondpass(left);
  769. location_reset(location,LOC_MMREGISTER,left.location.size);
  770. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  771. if UseAVX then
  772. begin
  773. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  774. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  775. end
  776. else
  777. begin
  778. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  779. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  780. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  781. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  782. end;
  783. end
  784. else
  785. begin
  786. load_fpu_location(left);
  787. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  788. end;
  789. end;
  790. procedure tx86inlinenode.second_sqrt_real;
  791. begin
  792. if use_vectorfpu(resultdef) then
  793. begin
  794. secondpass(left);
  795. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  796. location_reset(location,LOC_MMREGISTER,left.location.size);
  797. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  798. if UseAVX then
  799. case tfloatdef(resultdef).floattype of
  800. s32real:
  801. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  802. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  803. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  804. s64real:
  805. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  806. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  807. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  808. else
  809. internalerror(200510031);
  810. end
  811. else
  812. case tfloatdef(resultdef).floattype of
  813. s32real:
  814. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  815. s64real:
  816. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  817. else
  818. internalerror(2005100303);
  819. end;
  820. end
  821. else
  822. begin
  823. load_fpu_location(left);
  824. emit_none(A_FSQRT,S_NO);
  825. end;
  826. end;
  827. procedure tx86inlinenode.second_ln_real;
  828. begin
  829. load_fpu_location(left);
  830. emit_none(A_FLDLN2,S_NO);
  831. emit_none(A_FXCH,S_NO);
  832. emit_none(A_FYL2X,S_NO);
  833. end;
  834. procedure tx86inlinenode.second_cos_real;
  835. begin
  836. {$ifdef i8086}
  837. { FCOS is 387+ }
  838. if current_settings.cputype < cpu_386 then
  839. begin
  840. inherited;
  841. exit;
  842. end;
  843. {$endif i8086}
  844. load_fpu_location(left);
  845. emit_none(A_FCOS,S_NO);
  846. end;
  847. procedure tx86inlinenode.second_sin_real;
  848. begin
  849. {$ifdef i8086}
  850. { FSIN is 387+ }
  851. if current_settings.cputype < cpu_386 then
  852. begin
  853. inherited;
  854. exit;
  855. end;
  856. {$endif i8086}
  857. load_fpu_location(left);
  858. emit_none(A_FSIN,S_NO)
  859. end;
  860. procedure tx86inlinenode.second_prefetch;
  861. var
  862. ref : treference;
  863. r : tregister;
  864. checkpointer_used : boolean;
  865. begin
  866. {$if defined(i386) or defined(i8086)}
  867. if current_settings.cputype>=cpu_Pentium3 then
  868. {$endif i386 or i8086}
  869. begin
  870. { do not call Checkpointer for left node }
  871. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  872. if checkpointer_used then
  873. node_change_local_switch(left,cs_checkpointer,false);
  874. secondpass(left);
  875. if checkpointer_used then
  876. node_change_local_switch(left,cs_checkpointer,false);
  877. case left.location.loc of
  878. LOC_CREFERENCE,
  879. LOC_REFERENCE:
  880. begin
  881. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  882. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  883. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  884. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  885. end;
  886. else
  887. { nothing to prefetch };
  888. end;
  889. end;
  890. end;
  891. procedure tx86inlinenode.second_abs_long;
  892. var
  893. hregister : tregister;
  894. opsize : tcgsize;
  895. hp : taicpu;
  896. begin
  897. {$if defined(i8086) or defined(i386)}
  898. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  899. begin
  900. opsize:=def_cgsize(left.resultdef);
  901. secondpass(left);
  902. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  903. location:=left.location;
  904. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  905. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  906. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  907. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  908. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  909. end
  910. else
  911. {$endif i8086 or i386}
  912. begin
  913. opsize:=def_cgsize(left.resultdef);
  914. secondpass(left);
  915. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  916. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  917. location:=left.location;
  918. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  919. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  920. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  921. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  922. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  923. hp.condition:=C_NS;
  924. current_asmdata.CurrAsmList.concat(hp);
  925. end;
  926. end;
  927. {*****************************************************************************
  928. INCLUDE/EXCLUDE GENERIC HANDLING
  929. *****************************************************************************}
  930. procedure tx86inlinenode.second_IncludeExclude;
  931. var
  932. hregister,
  933. hregister2: tregister;
  934. setbase : aint;
  935. bitsperop,l : longint;
  936. cgop : topcg;
  937. asmop : tasmop;
  938. opdef : tdef;
  939. opsize,
  940. orgsize: tcgsize;
  941. begin
  942. {$ifdef i8086}
  943. { BTS and BTR are 386+ }
  944. if current_settings.cputype < cpu_386 then
  945. begin
  946. inherited;
  947. exit;
  948. end;
  949. {$endif i8086}
  950. if is_smallset(tcallparanode(left).resultdef) then
  951. begin
  952. opdef:=tcallparanode(left).resultdef;
  953. opsize:=int_cgsize(opdef.size)
  954. end
  955. else
  956. begin
  957. opdef:=u32inttype;
  958. opsize:=OS_32;
  959. end;
  960. bitsperop:=(8*tcgsize2size[opsize]);
  961. secondpass(tcallparanode(left).left);
  962. secondpass(tcallparanode(tcallparanode(left).right).left);
  963. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  964. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  965. begin
  966. { calculate bit position }
  967. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  968. { determine operator }
  969. if inlinenumber=in_include_x_y then
  970. cgop:=OP_OR
  971. else
  972. begin
  973. cgop:=OP_AND;
  974. l:=not(l);
  975. end;
  976. case tcallparanode(left).left.location.loc of
  977. LOC_REFERENCE :
  978. begin
  979. inc(tcallparanode(left).left.location.reference.offset,
  980. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  981. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  982. end;
  983. LOC_CSUBSETREG,
  984. LOC_CREGISTER :
  985. hlcg.a_op_const_loc(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.resultdef,l,tcallparanode(left).left.location);
  986. else
  987. internalerror(200405022);
  988. end;
  989. end
  990. else
  991. begin
  992. orgsize:=opsize;
  993. if opsize in [OS_8,OS_S8] then
  994. begin
  995. opdef:=u32inttype;
  996. opsize:=OS_32;
  997. end;
  998. { determine asm operator }
  999. if inlinenumber=in_include_x_y then
  1000. asmop:=A_BTS
  1001. else
  1002. asmop:=A_BTR;
  1003. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  1004. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  1005. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  1006. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  1007. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  1008. else
  1009. begin
  1010. { second argument can't be an 8 bit register either }
  1011. hregister2:=tcallparanode(left).left.location.register;
  1012. if (orgsize in [OS_8,OS_S8]) then
  1013. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  1014. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  1015. end;
  1016. end;
  1017. end;
  1018. procedure tx86inlinenode.second_popcnt;
  1019. var
  1020. opsize: tcgsize;
  1021. begin
  1022. secondpass(left);
  1023. opsize:=tcgsize2unsigned[left.location.size];
  1024. { no 8 Bit popcont }
  1025. if opsize=OS_8 then
  1026. opsize:=OS_16;
  1027. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1028. (left.location.size<>opsize) then
  1029. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1030. location_reset(location,LOC_REGISTER,opsize);
  1031. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1032. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1033. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1034. else
  1035. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1036. if resultdef.size=1 then
  1037. begin
  1038. location.size:=OS_8;
  1039. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1040. end;
  1041. end;
  1042. procedure tx86inlinenode.second_fma;
  1043. {$ifndef i8086}
  1044. const
  1045. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1046. (
  1047. { positive product }
  1048. (
  1049. { positive third operand }
  1050. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1051. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1052. ),
  1053. { negative third operand }
  1054. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1055. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1056. )
  1057. ),
  1058. { negative product }
  1059. (
  1060. { positive third operand }
  1061. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1062. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1063. ),
  1064. { negative third operand }
  1065. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1066. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1067. )
  1068. )
  1069. );
  1070. var
  1071. paraarray : array[1..3] of tnode;
  1072. memop,
  1073. i : integer;
  1074. negop3,
  1075. negproduct,
  1076. gotmem : boolean;
  1077. {$endif i8086}
  1078. begin
  1079. {$ifndef i8086}
  1080. if (fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[] then
  1081. begin
  1082. negop3:=false;
  1083. negproduct:=false;
  1084. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1085. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1086. paraarray[3]:=tcallparanode(parameters).paravalue;
  1087. { check if a neg. node can be removed
  1088. this is possible because changing the sign of
  1089. a floating point number does not affect its absolute
  1090. value in any way
  1091. }
  1092. if paraarray[1].nodetype=unaryminusn then
  1093. begin
  1094. paraarray[1]:=tunarynode(paraarray[1]).left;
  1095. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1096. only no code is generated for it }
  1097. negproduct:=not(negproduct);
  1098. end;
  1099. if paraarray[2].nodetype=unaryminusn then
  1100. begin
  1101. paraarray[2]:=tunarynode(paraarray[2]).left;
  1102. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1103. only no code is generated for it }
  1104. negproduct:=not(negproduct);
  1105. end;
  1106. if paraarray[3].nodetype=unaryminusn then
  1107. begin
  1108. paraarray[3]:=tunarynode(paraarray[3]).left;
  1109. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1110. only no code is generated for it }
  1111. negop3:=true;
  1112. end;
  1113. for i:=1 to 3 do
  1114. secondpass(paraarray[i]);
  1115. { only one memory operand is allowed }
  1116. gotmem:=false;
  1117. memop:=0;
  1118. { in case parameters come on the FPU stack, we have to pop them in reverse order as we
  1119. called secondpass }
  1120. for i:=3 downto 1 do
  1121. begin
  1122. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1123. begin
  1124. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1125. begin
  1126. memop:=i;
  1127. gotmem:=true;
  1128. end
  1129. else
  1130. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1131. end;
  1132. end;
  1133. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1134. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1135. if gotmem then
  1136. begin
  1137. case memop of
  1138. 1:
  1139. begin
  1140. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1141. paraarray[3].location.register,location.register,mms_movescalar);
  1142. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1143. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1144. end;
  1145. 2:
  1146. begin
  1147. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1148. paraarray[3].location.register,location.register,mms_movescalar);
  1149. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1150. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1151. end;
  1152. 3:
  1153. begin
  1154. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1155. paraarray[1].location.register,location.register,mms_movescalar);
  1156. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1157. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1158. end
  1159. else
  1160. internalerror(2014041301);
  1161. end;
  1162. end
  1163. else
  1164. begin
  1165. { try to use the location which is already in a temp. mm register as destination,
  1166. so the compiler might be able to re-use the register }
  1167. if paraarray[1].location.loc=LOC_MMREGISTER then
  1168. begin
  1169. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1170. paraarray[1].location.register,location.register,mms_movescalar);
  1171. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1172. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1173. end
  1174. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1175. begin
  1176. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1177. paraarray[2].location.register,location.register,mms_movescalar);
  1178. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1179. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1180. end
  1181. else
  1182. begin
  1183. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1184. paraarray[3].location.register,location.register,mms_movescalar);
  1185. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1186. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1187. end;
  1188. end;
  1189. end
  1190. else
  1191. {$endif i8086}
  1192. internalerror(2014032301);
  1193. end;
  1194. procedure tx86inlinenode.second_frac_real;
  1195. var
  1196. extrareg : TRegister;
  1197. begin
  1198. if use_vectorfpu(resultdef) then
  1199. begin
  1200. secondpass(left);
  1201. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1202. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1203. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1204. if UseAVX then
  1205. case tfloatdef(left.resultdef).floattype of
  1206. s32real:
  1207. begin
  1208. {$ifndef i8086}
  1209. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1210. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register))
  1211. else
  1212. {$endif not i8086}
  1213. begin
  1214. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1215. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1216. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1217. end;
  1218. end;
  1219. s64real:
  1220. begin
  1221. {$ifndef i8086}
  1222. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1223. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register))
  1224. else
  1225. {$endif not i8086}
  1226. begin
  1227. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1228. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1229. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1230. end;
  1231. end;
  1232. else
  1233. internalerror(2017052102);
  1234. end
  1235. else
  1236. begin
  1237. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1238. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1239. case tfloatdef(left.resultdef).floattype of
  1240. s32real:
  1241. begin
  1242. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1243. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1244. end;
  1245. s64real:
  1246. begin
  1247. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1248. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1249. end;
  1250. else
  1251. internalerror(2017052103);
  1252. end;
  1253. end;
  1254. if tfloatdef(left.resultdef).floattype<>tfloatdef(resultdef).floattype then
  1255. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,location.register,location.register,mms_movescalar);
  1256. end
  1257. else
  1258. internalerror(2017052101);
  1259. end;
  1260. procedure tx86inlinenode.second_int_real;
  1261. begin
  1262. if use_vectorfpu(resultdef) then
  1263. begin
  1264. secondpass(left);
  1265. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1266. location_reset(location,LOC_MMREGISTER,left.location.size);
  1267. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1268. if UseAVX then
  1269. case tfloatdef(resultdef).floattype of
  1270. s32real:
  1271. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1272. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1273. s64real:
  1274. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1275. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1276. else
  1277. internalerror(2017052105);
  1278. end
  1279. else
  1280. begin
  1281. case tfloatdef(resultdef).floattype of
  1282. s32real:
  1283. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1284. s64real:
  1285. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1286. else
  1287. internalerror(2017052106);
  1288. end;
  1289. end;
  1290. end
  1291. else
  1292. internalerror(2017052107);
  1293. end;
  1294. procedure tx86inlinenode.second_high;
  1295. var
  1296. donelab: tasmlabel;
  1297. hregister : tregister;
  1298. href : treference;
  1299. begin
  1300. secondpass(left);
  1301. if not(is_dynamic_array(left.resultdef)) then
  1302. Internalerror(2019122809);
  1303. { length in dynamic arrays is at offset -sizeof(pint) }
  1304. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1305. current_asmdata.getjumplabel(donelab);
  1306. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1307. flag is set in this case, so we can jump depending on it
  1308. when loading the actual high value, we have to take care later of the decreased value
  1309. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1310. we are using is not working as dec does not touch the carry flag }
  1311. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1312. { volatility of the dyn. array refers to the volatility of the
  1313. string pointer, not of the string data }
  1314. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1315. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1316. { if the string pointer is nil, the length is 0 -> reuse the register
  1317. that originally held the string pointer for the length, so that we
  1318. can keep the original nil/0 as length in that case }
  1319. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1320. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1321. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1322. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1323. location.register:=hregister;
  1324. end;
  1325. procedure tx86inlinenode.second_minmax;
  1326. {$ifndef i8086}
  1327. const
  1328. oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
  1329. (
  1330. (
  1331. (A_MINSS,A_MINSD),
  1332. (A_VMINSS,A_VMINSD)
  1333. ),
  1334. (
  1335. (A_MAXSS,A_MAXSD),
  1336. (A_VMAXSS,A_VMAXSD)
  1337. )
  1338. );
  1339. var
  1340. paraarray : array[1..2] of tnode;
  1341. memop,
  1342. i : integer;
  1343. gotmem : boolean;
  1344. op: TAsmOp;
  1345. {$endif i8086}
  1346. begin
  1347. {$ifndef i8086}
  1348. if
  1349. {$ifdef i386}
  1350. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  1351. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  1352. {$else i386}
  1353. is_single(resultdef) or is_double(resultdef)
  1354. {$endif i386}
  1355. then
  1356. begin
  1357. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1358. paraarray[2]:=tcallparanode(parameters).paravalue;
  1359. for i:=low(paraarray) to high(paraarray) do
  1360. secondpass(paraarray[i]);
  1361. { only one memory operand is allowed }
  1362. gotmem:=false;
  1363. memop:=0;
  1364. for i:=low(paraarray) to high(paraarray) do
  1365. begin
  1366. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1367. begin
  1368. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1369. begin
  1370. memop:=i;
  1371. gotmem:=true;
  1372. end
  1373. else
  1374. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1375. end;
  1376. end;
  1377. { due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
  1378. one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
  1379. if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
  1380. begin
  1381. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
  1382. gotmem:=false;
  1383. end;
  1384. op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
  1385. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1386. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1387. if gotmem then
  1388. begin
  1389. if UseAVX then
  1390. case memop of
  1391. 1:
  1392. emit_ref_reg_reg(op,S_NO,
  1393. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1394. 2:
  1395. emit_ref_reg_reg(op,S_NO,
  1396. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1397. else
  1398. internalerror(2020120504);
  1399. end
  1400. else
  1401. case memop of
  1402. 1:
  1403. begin
  1404. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1405. paraarray[2].location.register,location.register,mms_movescalar);
  1406. emit_ref_reg(op,S_NO,
  1407. paraarray[1].location.reference,location.register);
  1408. end;
  1409. 2:
  1410. begin
  1411. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1412. paraarray[1].location.register,location.register,mms_movescalar);
  1413. emit_ref_reg(op,S_NO,
  1414. paraarray[2].location.reference,location.register);
  1415. end;
  1416. else
  1417. internalerror(2020120601);
  1418. end;
  1419. end
  1420. else
  1421. begin
  1422. if UseAVX then
  1423. emit_reg_reg_reg(op,S_NO,
  1424. paraarray[2].location.register,paraarray[1].location.register,location.register)
  1425. else
  1426. begin
  1427. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1428. paraarray[1].location.register,location.register,mms_movescalar);
  1429. emit_reg_reg(op,S_NO,
  1430. paraarray[2].location.register,location.register)
  1431. end;
  1432. end;
  1433. end
  1434. else
  1435. {$endif i8086}
  1436. internalerror(2020120503);
  1437. end;
  1438. end.