nx86inl.pas 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function first_minmax: tnode; override;
  48. function simplify(forinline : boolean) : tnode; override;
  49. { second pass override to generate these nodes }
  50. procedure pass_generate_code_cpu;override;
  51. procedure second_IncludeExclude;override;
  52. procedure second_pi; override;
  53. procedure second_arctan_real; override;
  54. procedure second_abs_real; override;
  55. procedure second_round_real; override;
  56. procedure second_sqr_real; override;
  57. procedure second_sqrt_real; override;
  58. procedure second_ln_real; override;
  59. procedure second_cos_real; override;
  60. procedure second_sin_real; override;
  61. procedure second_trunc_real; override;
  62. procedure second_prefetch;override;
  63. procedure second_abs_long;override;
  64. procedure second_popcnt;override;
  65. procedure second_fma;override;
  66. procedure second_frac_real;override;
  67. procedure second_int_real;override;
  68. procedure second_high;override;
  69. procedure second_minmax;override;
  70. private
  71. procedure load_fpu_location(lnode: tnode);
  72. end;
  73. implementation
  74. uses
  75. systems,
  76. globtype,globals,
  77. verbose,compinnr,fmodule,
  78. defutil,
  79. aasmbase,aasmdata,aasmcpu,
  80. symconst,symtype,symdef,symcpu,
  81. ncnv,
  82. htypechk,
  83. cgbase,pass_1,pass_2,
  84. cpuinfo,cpubase,nutils,
  85. ncal,ncgutil,nld,ncon,
  86. tgobj,
  87. cga,cgutils,cgx86,cgobj,hlcgobj;
  88. {*****************************************************************************
  89. TX86INLINENODE
  90. *****************************************************************************}
  91. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  92. begin
  93. { only makes a difference for x86_64 }
  94. end;
  95. function tx86inlinenode.pass_typecheck_cpu: tnode;
  96. begin
  97. Result:=nil;
  98. case inlinenumber of
  99. in_x86_inportb:
  100. begin
  101. CheckParameters(1);
  102. resultdef:=u8inttype;
  103. end;
  104. in_x86_inportw:
  105. begin
  106. CheckParameters(1);
  107. resultdef:=u16inttype;
  108. end;
  109. in_x86_inportl:
  110. begin
  111. CheckParameters(1);
  112. resultdef:=s32inttype;
  113. end;
  114. in_x86_outportb,
  115. in_x86_outportw,
  116. in_x86_outportl:
  117. begin
  118. CheckParameters(2);
  119. resultdef:=voidtype;
  120. end;
  121. in_x86_cli,
  122. in_x86_sti:
  123. resultdef:=voidtype;
  124. in_x86_get_cs,
  125. in_x86_get_ss,
  126. in_x86_get_ds,
  127. in_x86_get_es,
  128. in_x86_get_fs,
  129. in_x86_get_gs:
  130. {$ifdef i8086}
  131. resultdef:=u16inttype;
  132. {$else i8086}
  133. resultdef:=s32inttype;
  134. {$endif i8086}
  135. { include automatically generated code }
  136. {$i x86mmtype.inc}
  137. else
  138. Result:=inherited pass_typecheck_cpu;
  139. end;
  140. end;
  141. function tx86inlinenode.first_cpu: tnode;
  142. begin
  143. Result:=nil;
  144. case inlinenumber of
  145. in_x86_inportb,
  146. in_x86_inportw,
  147. in_x86_inportl,
  148. in_x86_get_cs,
  149. in_x86_get_ss,
  150. in_x86_get_ds,
  151. in_x86_get_es,
  152. in_x86_get_fs,
  153. in_x86_get_gs:
  154. expectloc:=LOC_REGISTER;
  155. in_x86_outportb,
  156. in_x86_outportw,
  157. in_x86_outportl,
  158. in_x86_cli,
  159. in_x86_sti:
  160. expectloc:=LOC_VOID;
  161. { include automatically generated code }
  162. {$i x86mmfirst.inc}
  163. else
  164. Result:=inherited first_cpu;
  165. end;
  166. end;
  167. function tx86inlinenode.first_pi : tnode;
  168. begin
  169. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  170. begin
  171. expectloc:=LOC_FPUREGISTER;
  172. first_pi := nil;
  173. end
  174. else
  175. result:=inherited;
  176. end;
  177. function tx86inlinenode.first_arctan_real : tnode;
  178. begin
  179. {$ifdef i8086}
  180. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  181. so we need to use the RTL helper on these FPUs }
  182. if current_settings.cputype < cpu_386 then
  183. begin
  184. result := inherited;
  185. exit;
  186. end;
  187. {$endif i8086}
  188. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  189. begin
  190. expectloc:=LOC_FPUREGISTER;
  191. first_arctan_real := nil;
  192. end
  193. else
  194. result:=inherited;
  195. end;
  196. function tx86inlinenode.first_abs_real : tnode;
  197. begin
  198. if use_vectorfpu(resultdef) then
  199. expectloc:=LOC_MMREGISTER
  200. else
  201. expectloc:=LOC_FPUREGISTER;
  202. first_abs_real := nil;
  203. end;
  204. function tx86inlinenode.first_sqr_real : tnode;
  205. begin
  206. if use_vectorfpu(resultdef) then
  207. expectloc:=LOC_MMREGISTER
  208. else
  209. expectloc:=LOC_FPUREGISTER;
  210. first_sqr_real := nil;
  211. end;
  212. function tx86inlinenode.first_sqrt_real : tnode;
  213. begin
  214. if use_vectorfpu(resultdef) then
  215. expectloc:=LOC_MMREGISTER
  216. else
  217. expectloc:=LOC_FPUREGISTER;
  218. first_sqrt_real := nil;
  219. end;
  220. function tx86inlinenode.first_ln_real : tnode;
  221. begin
  222. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  223. begin
  224. expectloc:=LOC_FPUREGISTER;
  225. first_ln_real := nil;
  226. end
  227. else
  228. result:=inherited;
  229. end;
  230. function tx86inlinenode.first_cos_real : tnode;
  231. begin
  232. {$ifdef i8086}
  233. { FCOS is 387+ }
  234. if current_settings.cputype < cpu_386 then
  235. begin
  236. result := inherited;
  237. exit;
  238. end;
  239. {$endif i8086}
  240. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  241. begin
  242. expectloc:=LOC_FPUREGISTER;
  243. result:=nil;
  244. end
  245. else
  246. result:=inherited;
  247. end;
  248. function tx86inlinenode.first_sin_real : tnode;
  249. begin
  250. {$ifdef i8086}
  251. { FSIN is 387+ }
  252. if current_settings.cputype < cpu_386 then
  253. begin
  254. result := inherited;
  255. exit;
  256. end;
  257. {$endif i8086}
  258. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  259. begin
  260. expectloc:=LOC_FPUREGISTER;
  261. result:=nil;
  262. end
  263. else
  264. result:=inherited;
  265. end;
  266. function tx86inlinenode.first_round_real : tnode;
  267. begin
  268. maybe_remove_round_trunc_typeconv;
  269. {$ifdef x86_64}
  270. if use_vectorfpu(left.resultdef) then
  271. expectloc:=LOC_REGISTER
  272. else
  273. {$endif x86_64}
  274. expectloc:=LOC_REFERENCE;
  275. result:=nil;
  276. end;
  277. function tx86inlinenode.first_trunc_real: tnode;
  278. begin
  279. maybe_remove_round_trunc_typeconv;
  280. if (cs_opt_size in current_settings.optimizerswitches)
  281. {$ifdef x86_64}
  282. and not(use_vectorfpu(left.resultdef))
  283. {$endif x86_64}
  284. then
  285. result:=inherited
  286. else
  287. begin
  288. {$ifdef x86_64}
  289. if use_vectorfpu(left.resultdef) then
  290. expectloc:=LOC_REGISTER
  291. else
  292. {$endif x86_64}
  293. expectloc:=LOC_REFERENCE;
  294. result:=nil;
  295. end;
  296. end;
  297. function tx86inlinenode.first_popcnt: tnode;
  298. begin
  299. Result:=nil;
  300. {$ifndef i8086}
  301. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  302. {$ifdef i386}
  303. and not is_64bit(left.resultdef)
  304. {$endif i386}
  305. then
  306. expectloc:=LOC_REGISTER
  307. else
  308. {$endif not i8086}
  309. Result:=inherited first_popcnt
  310. end;
  311. function tx86inlinenode.first_fma : tnode;
  312. begin
  313. {$ifndef i8086}
  314. if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
  315. ((is_double(resultdef)) or (is_single(resultdef))) then
  316. begin
  317. expectloc:=LOC_MMREGISTER;
  318. Result:=nil;
  319. end
  320. else
  321. {$endif i8086}
  322. Result:=inherited first_fma;
  323. end;
  324. function tx86inlinenode.first_frac_real : tnode;
  325. begin
  326. if (current_settings.fputype>=fpu_sse41) and
  327. ((is_double(resultdef)) or (is_single(resultdef))) then
  328. begin
  329. maybe_remove_round_trunc_typeconv;
  330. expectloc:=LOC_MMREGISTER;
  331. Result:=nil;
  332. end
  333. else
  334. Result:=inherited first_frac_real;
  335. end;
  336. function tx86inlinenode.first_int_real : tnode;
  337. begin
  338. if (current_settings.fputype>=fpu_sse41) and
  339. ((is_double(resultdef)) or (is_single(resultdef))) then
  340. begin
  341. Result:=nil;
  342. expectloc:=LOC_MMREGISTER;
  343. end
  344. else
  345. Result:=inherited first_int_real;
  346. end;
  347. function tx86inlinenode.first_minmax: tnode;
  348. begin
  349. {$ifndef i8086}
  350. if
  351. {$ifdef i386}
  352. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  353. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  354. {$else i386}
  355. ((is_double(resultdef)) or (is_single(resultdef)))
  356. {$endif i386}
  357. then
  358. begin
  359. expectloc:=LOC_MMREGISTER;
  360. Result:=nil;
  361. end
  362. else
  363. {$endif i8086}
  364. Result:=inherited first_minmax;
  365. end;
  366. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  367. var
  368. temp : tnode;
  369. begin
  370. if (current_settings.fputype>=fpu_sse41) and
  371. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  372. not(nf_explicit in left.flags) and
  373. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  374. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  375. begin
  376. { get rid of the type conversion }
  377. temp:=ttypeconvnode(left).left;
  378. ttypeconvnode(left).left:=nil;
  379. left.free;
  380. left:=temp;
  381. result:=self.getcopy;
  382. tinlinenode(result).resultdef:=temp.resultdef;
  383. typecheckpass(result);
  384. end
  385. else
  386. Result:=inherited simplify(forinline);
  387. end;
  388. procedure tx86inlinenode.pass_generate_code_cpu;
  389. var
  390. paraarray : array[1..4] of tnode;
  391. i : integer;
  392. op: TAsmOp;
  393. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  394. var
  395. portnumber: tnode;
  396. begin
  397. portnumber:=left;
  398. secondpass(portnumber);
  399. if (portnumber.location.loc=LOC_CONSTANT) and
  400. (portnumber.location.value>=0) and
  401. (portnumber.location.value<=255) then
  402. begin
  403. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  404. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  405. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  406. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  407. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  408. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  409. end
  410. else
  411. begin
  412. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  413. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  414. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  415. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  416. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  417. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  418. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  419. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  420. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  421. end;
  422. end;
  423. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  424. var
  425. portnumber, portdata: tnode;
  426. begin
  427. portnumber:=tcallparanode(tcallparanode(left).right).left;
  428. portdata:=tcallparanode(left).left;
  429. secondpass(portdata);
  430. secondpass(portnumber);
  431. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  432. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  433. if (portnumber.location.loc=LOC_CONSTANT) and
  434. (portnumber.location.value>=0) and
  435. (portnumber.location.value<=255) then
  436. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  437. else
  438. begin
  439. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  440. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  441. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  442. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  443. end;
  444. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  445. end;
  446. procedure get_segreg(segreg:tregister);
  447. begin
  448. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  449. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  450. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  451. end;
  452. function GetConstInt(n: tnode): longint;
  453. begin
  454. Result:=0;
  455. if is_constintnode(n) then
  456. result:=tordconstnode(n).value.svalue
  457. else
  458. Message(type_e_constant_expr_expected);
  459. end;
  460. procedure GetParameters(count: longint);
  461. var
  462. i: longint;
  463. p: tnode;
  464. begin
  465. if (count=1) and
  466. (not (left is tcallparanode)) then
  467. paraarray[1]:=left
  468. else
  469. begin
  470. p:=left;
  471. for i := count downto 1 do
  472. begin
  473. paraarray[i]:=tcallparanode(p).paravalue;
  474. p:=tcallparanode(p).nextpara;
  475. end;
  476. end;
  477. end;
  478. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  479. var
  480. reg : tregister;
  481. begin
  482. if (l.loc<>LOC_MMXREGISTER) and
  483. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  484. begin
  485. reg:=tcgx86(cg).getmmxregister(list);
  486. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  487. location_freetemp(list,l);
  488. location_reset(l,LOC_MMXREGISTER,OS_M64);
  489. l.register:=reg;
  490. end;
  491. end;
  492. procedure location_make_ref(var loc: tlocation);
  493. var
  494. hloc: tlocation;
  495. begin
  496. case loc.loc of
  497. LOC_CREGISTER,
  498. LOC_REGISTER:
  499. begin
  500. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  501. hloc.reference.base:=loc.register;
  502. loc:=hloc;
  503. end;
  504. LOC_CREFERENCE,
  505. LOC_REFERENCE:
  506. begin
  507. end;
  508. else
  509. begin
  510. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  511. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  512. hloc.reference.base:=loc.register;
  513. loc:=hloc;
  514. end;
  515. end;
  516. end;
  517. begin
  518. FillChar(paraarray,sizeof(paraarray),0);
  519. case inlinenumber of
  520. in_x86_inportb:
  521. inport(NR_AL,S_B,u8inttype);
  522. in_x86_inportw:
  523. inport(NR_AX,S_W,u16inttype);
  524. in_x86_inportl:
  525. inport(NR_EAX,S_L,s32inttype);
  526. in_x86_outportb:
  527. outport(NR_AL,S_B,u8inttype);
  528. in_x86_outportw:
  529. outport(NR_AX,S_W,u16inttype);
  530. in_x86_outportl:
  531. outport(NR_EAX,S_L,s32inttype);
  532. in_x86_cli:
  533. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  534. in_x86_sti:
  535. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  536. in_x86_get_cs:
  537. get_segreg(NR_CS);
  538. in_x86_get_ss:
  539. get_segreg(NR_SS);
  540. in_x86_get_ds:
  541. get_segreg(NR_DS);
  542. in_x86_get_es:
  543. get_segreg(NR_ES);
  544. in_x86_get_fs:
  545. get_segreg(NR_FS);
  546. in_x86_get_gs:
  547. get_segreg(NR_GS);
  548. {$i x86mmsecond.inc}
  549. else
  550. inherited pass_generate_code_cpu;
  551. end;
  552. end;
  553. procedure tx86inlinenode.second_pi;
  554. begin
  555. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  556. emit_none(A_FLDPI,S_NO);
  557. tcgx86(cg).inc_fpu_stack;
  558. location.register:=NR_FPU_RESULT_REG;
  559. end;
  560. { load the FPU into the an fpu register }
  561. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  562. begin
  563. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  564. location.register:=NR_FPU_RESULT_REG;
  565. secondpass(lnode);
  566. case lnode.location.loc of
  567. LOC_FPUREGISTER:
  568. ;
  569. LOC_CFPUREGISTER:
  570. begin
  571. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  572. lnode.location.size,lnode.location.register,location.register);
  573. end;
  574. LOC_REFERENCE,LOC_CREFERENCE:
  575. begin
  576. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  577. lnode.location.size,lnode.location.size,
  578. lnode.location.reference,location.register);
  579. end;
  580. LOC_MMREGISTER,LOC_CMMREGISTER:
  581. begin
  582. location:=lnode.location;
  583. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,lnode.resultdef,false);
  584. end;
  585. else
  586. internalerror(309991);
  587. end;
  588. end;
  589. procedure tx86inlinenode.second_arctan_real;
  590. begin
  591. load_fpu_location(left);
  592. emit_none(A_FLD1,S_NO);
  593. emit_none(A_FPATAN,S_NO);
  594. end;
  595. procedure tx86inlinenode.second_abs_real;
  596. function needs_indirect:boolean; inline;
  597. begin
  598. result:=(tf_supports_packages in target_info.flags) and
  599. (target_info.system in systems_indirect_var_imports);
  600. end;
  601. var
  602. href : treference;
  603. sym : tasmsymbol;
  604. begin
  605. if use_vectorfpu(resultdef) then
  606. begin
  607. secondpass(left);
  608. if left.location.loc<>LOC_MMREGISTER then
  609. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  610. if UseAVX then
  611. begin
  612. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  613. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  614. end
  615. else
  616. location:=left.location;
  617. case tfloatdef(resultdef).floattype of
  618. s32real:
  619. begin
  620. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  621. reference_reset_symbol(href,sym,0,4,[]);
  622. current_module.add_extern_asmsym(sym);
  623. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  624. if UseAVX then
  625. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  626. A_VANDPS,S_XMM,href,left.location.register,location.register))
  627. else
  628. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  629. end;
  630. s64real:
  631. begin
  632. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  633. reference_reset_symbol(href,sym,0,4,[]);
  634. current_module.add_extern_asmsym(sym);
  635. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  636. if UseAVX then
  637. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  638. A_VANDPD,S_XMM,href,left.location.register,location.register))
  639. else
  640. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  641. end;
  642. else
  643. internalerror(200506081);
  644. end;
  645. end
  646. else
  647. begin
  648. load_fpu_location(left);
  649. emit_none(A_FABS,S_NO);
  650. end;
  651. end;
  652. procedure tx86inlinenode.second_round_real;
  653. begin
  654. {$ifdef x86_64}
  655. if use_vectorfpu(left.resultdef) then
  656. begin
  657. secondpass(left);
  658. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  659. location_reset(location,LOC_REGISTER,OS_S64);
  660. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  661. if UseAVX then
  662. case left.location.size of
  663. OS_F32:
  664. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  665. OS_F64:
  666. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  667. else
  668. internalerror(2007031402);
  669. end
  670. else
  671. case left.location.size of
  672. OS_F32:
  673. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  674. OS_F64:
  675. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  676. else
  677. internalerror(2007031404);
  678. end;
  679. end
  680. else
  681. {$endif x86_64}
  682. begin
  683. load_fpu_location(left);
  684. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  685. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  686. emit_ref(A_FISTP,S_IQ,location.reference);
  687. tcgx86(cg).dec_fpu_stack;
  688. emit_none(A_FWAIT,S_NO);
  689. end;
  690. end;
  691. procedure tx86inlinenode.second_trunc_real;
  692. var
  693. oldcw,newcw : treference;
  694. begin
  695. {$ifdef x86_64}
  696. if use_vectorfpu(left.resultdef) and
  697. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  698. begin
  699. secondpass(left);
  700. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  701. location_reset(location,LOC_REGISTER,OS_S64);
  702. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  703. if UseAVX then
  704. case left.location.size of
  705. OS_F32:
  706. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  707. OS_F64:
  708. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  709. else
  710. internalerror(2007031401);
  711. end
  712. else
  713. case left.location.size of
  714. OS_F32:
  715. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  716. OS_F64:
  717. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  718. else
  719. internalerror(2007031403);
  720. end;
  721. end
  722. else
  723. {$endif x86_64}
  724. begin
  725. if (current_settings.fputype>=fpu_sse3) then
  726. begin
  727. load_fpu_location(left);
  728. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  729. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  730. emit_ref(A_FISTTP,S_IQ,location.reference);
  731. tcgx86(cg).dec_fpu_stack;
  732. end
  733. else
  734. begin
  735. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  736. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  737. {$ifdef i8086}
  738. if current_settings.cputype<=cpu_286 then
  739. begin
  740. emit_ref(A_FSTCW,S_NO,newcw);
  741. emit_ref(A_FSTCW,S_NO,oldcw);
  742. emit_none(A_FWAIT,S_NO);
  743. end
  744. else
  745. {$endif i8086}
  746. begin
  747. emit_ref(A_FNSTCW,S_NO,newcw);
  748. emit_ref(A_FNSTCW,S_NO,oldcw);
  749. end;
  750. emit_const_ref(A_OR,S_W,$0f00,newcw);
  751. load_fpu_location(left);
  752. emit_ref(A_FLDCW,S_NO,newcw);
  753. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  754. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  755. emit_ref(A_FISTP,S_IQ,location.reference);
  756. tcgx86(cg).dec_fpu_stack;
  757. emit_ref(A_FLDCW,S_NO,oldcw);
  758. emit_none(A_FWAIT,S_NO);
  759. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  760. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  761. end;
  762. end;
  763. end;
  764. procedure tx86inlinenode.second_sqr_real;
  765. begin
  766. if use_vectorfpu(resultdef) then
  767. begin
  768. secondpass(left);
  769. location_reset(location,LOC_MMREGISTER,left.location.size);
  770. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  771. if UseAVX then
  772. begin
  773. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  774. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  775. end
  776. else
  777. begin
  778. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  779. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  780. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  781. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  782. end;
  783. end
  784. else
  785. begin
  786. load_fpu_location(left);
  787. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  788. end;
  789. end;
  790. procedure tx86inlinenode.second_sqrt_real;
  791. begin
  792. if use_vectorfpu(resultdef) then
  793. begin
  794. secondpass(left);
  795. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  796. location_reset(location,LOC_MMREGISTER,left.location.size);
  797. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  798. if UseAVX then
  799. case tfloatdef(resultdef).floattype of
  800. s32real:
  801. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  802. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  803. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  804. s64real:
  805. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  806. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  807. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  808. else
  809. internalerror(200510031);
  810. end
  811. else
  812. case tfloatdef(resultdef).floattype of
  813. s32real:
  814. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  815. s64real:
  816. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  817. else
  818. internalerror(2005100303);
  819. end;
  820. end
  821. else
  822. begin
  823. load_fpu_location(left);
  824. emit_none(A_FSQRT,S_NO);
  825. end;
  826. end;
  827. procedure tx86inlinenode.second_ln_real;
  828. begin
  829. load_fpu_location(left);
  830. emit_none(A_FLDLN2,S_NO);
  831. emit_none(A_FXCH,S_NO);
  832. emit_none(A_FYL2X,S_NO);
  833. end;
  834. procedure tx86inlinenode.second_cos_real;
  835. begin
  836. {$ifdef i8086}
  837. { FCOS is 387+ }
  838. if current_settings.cputype < cpu_386 then
  839. begin
  840. inherited;
  841. exit;
  842. end;
  843. {$endif i8086}
  844. load_fpu_location(left);
  845. emit_none(A_FCOS,S_NO);
  846. end;
  847. procedure tx86inlinenode.second_sin_real;
  848. begin
  849. {$ifdef i8086}
  850. { FSIN is 387+ }
  851. if current_settings.cputype < cpu_386 then
  852. begin
  853. inherited;
  854. exit;
  855. end;
  856. {$endif i8086}
  857. load_fpu_location(left);
  858. emit_none(A_FSIN,S_NO)
  859. end;
  860. procedure tx86inlinenode.second_prefetch;
  861. var
  862. ref : treference;
  863. r : tregister;
  864. checkpointer_used : boolean;
  865. begin
  866. {$if defined(i386) or defined(i8086)}
  867. if current_settings.cputype>=cpu_Pentium3 then
  868. {$endif i386 or i8086}
  869. begin
  870. { do not call Checkpointer for left node }
  871. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  872. if checkpointer_used then
  873. node_change_local_switch(left,cs_checkpointer,false);
  874. secondpass(left);
  875. if checkpointer_used then
  876. node_change_local_switch(left,cs_checkpointer,false);
  877. case left.location.loc of
  878. LOC_CREFERENCE,
  879. LOC_REFERENCE:
  880. begin
  881. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  882. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  883. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  884. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  885. end;
  886. else
  887. { nothing to prefetch };
  888. end;
  889. end;
  890. end;
  891. procedure tx86inlinenode.second_abs_long;
  892. var
  893. hregister : tregister;
  894. opsize : tcgsize;
  895. hp : taicpu;
  896. begin
  897. {$if defined(i8086) or defined(i386)}
  898. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  899. begin
  900. opsize:=def_cgsize(left.resultdef);
  901. secondpass(left);
  902. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  903. location:=left.location;
  904. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  905. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  906. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  907. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  908. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  909. end
  910. else
  911. {$endif i8086 or i386}
  912. begin
  913. opsize:=def_cgsize(left.resultdef);
  914. secondpass(left);
  915. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  916. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  917. location:=left.location;
  918. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  919. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  920. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  921. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  922. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  923. hp.condition:=C_NS;
  924. current_asmdata.CurrAsmList.concat(hp);
  925. end;
  926. end;
  927. {*****************************************************************************
  928. INCLUDE/EXCLUDE GENERIC HANDLING
  929. *****************************************************************************}
  930. procedure tx86inlinenode.second_IncludeExclude;
  931. var
  932. hregister,
  933. hregister2: tregister;
  934. setbase : aint;
  935. bitsperop,l : longint;
  936. cgop : topcg;
  937. asmop : tasmop;
  938. opdef : tdef;
  939. opsize,
  940. orgsize: tcgsize;
  941. begin
  942. {$ifdef i8086}
  943. { BTS and BTR are 386+ }
  944. if current_settings.cputype < cpu_386 then
  945. begin
  946. inherited;
  947. exit;
  948. end;
  949. {$endif i8086}
  950. if is_smallset(tcallparanode(left).resultdef) then
  951. begin
  952. opdef:=tcallparanode(left).resultdef;
  953. opsize:=int_cgsize(opdef.size)
  954. end
  955. else
  956. begin
  957. opdef:=u32inttype;
  958. opsize:=OS_32;
  959. end;
  960. bitsperop:=(8*tcgsize2size[opsize]);
  961. secondpass(tcallparanode(left).left);
  962. secondpass(tcallparanode(tcallparanode(left).right).left);
  963. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  964. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  965. begin
  966. { calculate bit position }
  967. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  968. { determine operator }
  969. if inlinenumber=in_include_x_y then
  970. cgop:=OP_OR
  971. else
  972. begin
  973. cgop:=OP_AND;
  974. l:=not(l);
  975. end;
  976. case tcallparanode(left).left.location.loc of
  977. LOC_REFERENCE :
  978. begin
  979. inc(tcallparanode(left).left.location.reference.offset,
  980. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  981. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  982. end;
  983. LOC_CREGISTER :
  984. cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
  985. else
  986. internalerror(200405022);
  987. end;
  988. end
  989. else
  990. begin
  991. orgsize:=opsize;
  992. if opsize in [OS_8,OS_S8] then
  993. begin
  994. opdef:=u32inttype;
  995. opsize:=OS_32;
  996. end;
  997. { determine asm operator }
  998. if inlinenumber=in_include_x_y then
  999. asmop:=A_BTS
  1000. else
  1001. asmop:=A_BTR;
  1002. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  1003. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  1004. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  1005. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  1006. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  1007. else
  1008. begin
  1009. { second argument can't be an 8 bit register either }
  1010. hregister2:=tcallparanode(left).left.location.register;
  1011. if (orgsize in [OS_8,OS_S8]) then
  1012. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  1013. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  1014. end;
  1015. end;
  1016. end;
  1017. procedure tx86inlinenode.second_popcnt;
  1018. var
  1019. opsize: tcgsize;
  1020. begin
  1021. secondpass(left);
  1022. opsize:=tcgsize2unsigned[left.location.size];
  1023. { no 8 Bit popcont }
  1024. if opsize=OS_8 then
  1025. opsize:=OS_16;
  1026. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1027. (left.location.size<>opsize) then
  1028. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1029. location_reset(location,LOC_REGISTER,opsize);
  1030. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1031. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1032. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1033. else
  1034. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1035. if resultdef.size=1 then
  1036. begin
  1037. location.size:=OS_8;
  1038. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1039. end;
  1040. end;
  1041. procedure tx86inlinenode.second_fma;
  1042. const
  1043. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1044. (
  1045. { positive product }
  1046. (
  1047. { positive third operand }
  1048. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1049. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1050. ),
  1051. { negative third operand }
  1052. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1053. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1054. )
  1055. ),
  1056. { negative product }
  1057. (
  1058. { positive third operand }
  1059. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1060. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1061. ),
  1062. { negative third operand }
  1063. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1064. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1065. )
  1066. )
  1067. );
  1068. var
  1069. paraarray : array[1..3] of tnode;
  1070. memop,
  1071. i : integer;
  1072. negop3,
  1073. negproduct,
  1074. gotmem : boolean;
  1075. begin
  1076. {$ifndef i8086}
  1077. if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
  1078. begin
  1079. negop3:=false;
  1080. negproduct:=false;
  1081. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1082. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1083. paraarray[3]:=tcallparanode(parameters).paravalue;
  1084. { check if a neg. node can be removed
  1085. this is possible because changing the sign of
  1086. a floating point number does not affect its absolute
  1087. value in any way
  1088. }
  1089. if paraarray[1].nodetype=unaryminusn then
  1090. begin
  1091. paraarray[1]:=tunarynode(paraarray[1]).left;
  1092. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1093. only no code is generated for it }
  1094. negproduct:=not(negproduct);
  1095. end;
  1096. if paraarray[2].nodetype=unaryminusn then
  1097. begin
  1098. paraarray[2]:=tunarynode(paraarray[2]).left;
  1099. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1100. only no code is generated for it }
  1101. negproduct:=not(negproduct);
  1102. end;
  1103. if paraarray[3].nodetype=unaryminusn then
  1104. begin
  1105. paraarray[3]:=tunarynode(paraarray[3]).left;
  1106. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1107. only no code is generated for it }
  1108. negop3:=true;
  1109. end;
  1110. for i:=1 to 3 do
  1111. secondpass(paraarray[i]);
  1112. { only one memory operand is allowed }
  1113. gotmem:=false;
  1114. memop:=0;
  1115. { in case parameters come on the FPU stack, we have to pop them in reverse order as we
  1116. called secondpass }
  1117. for i:=3 downto 1 do
  1118. begin
  1119. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1120. begin
  1121. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1122. begin
  1123. memop:=i;
  1124. gotmem:=true;
  1125. end
  1126. else
  1127. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1128. end;
  1129. end;
  1130. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1131. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1132. if gotmem then
  1133. begin
  1134. case memop of
  1135. 1:
  1136. begin
  1137. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1138. paraarray[3].location.register,location.register,mms_movescalar);
  1139. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1140. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1141. end;
  1142. 2:
  1143. begin
  1144. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1145. paraarray[3].location.register,location.register,mms_movescalar);
  1146. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1147. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1148. end;
  1149. 3:
  1150. begin
  1151. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1152. paraarray[1].location.register,location.register,mms_movescalar);
  1153. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1154. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1155. end
  1156. else
  1157. internalerror(2014041301);
  1158. end;
  1159. end
  1160. else
  1161. begin
  1162. { try to use the location which is already in a temp. mm register as destination,
  1163. so the compiler might be able to re-use the register }
  1164. if paraarray[1].location.loc=LOC_MMREGISTER then
  1165. begin
  1166. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1167. paraarray[1].location.register,location.register,mms_movescalar);
  1168. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1169. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1170. end
  1171. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1172. begin
  1173. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1174. paraarray[2].location.register,location.register,mms_movescalar);
  1175. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1176. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1177. end
  1178. else
  1179. begin
  1180. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1181. paraarray[3].location.register,location.register,mms_movescalar);
  1182. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1183. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1184. end;
  1185. end;
  1186. end
  1187. else
  1188. {$endif i8086}
  1189. internalerror(2014032301);
  1190. end;
  1191. procedure tx86inlinenode.second_frac_real;
  1192. var
  1193. extrareg : TRegister;
  1194. begin
  1195. if use_vectorfpu(resultdef) then
  1196. begin
  1197. secondpass(left);
  1198. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1199. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1200. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1201. if UseAVX then
  1202. case tfloatdef(left.resultdef).floattype of
  1203. s32real:
  1204. begin
  1205. {$ifndef i8086}
  1206. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1207. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register))
  1208. else
  1209. {$endif not i8086}
  1210. begin
  1211. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1212. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1213. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1214. end;
  1215. end;
  1216. s64real:
  1217. begin
  1218. {$ifndef i8086}
  1219. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1220. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register))
  1221. else
  1222. {$endif not i8086}
  1223. begin
  1224. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1225. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1226. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1227. end;
  1228. end;
  1229. else
  1230. internalerror(2017052102);
  1231. end
  1232. else
  1233. begin
  1234. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1235. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1236. case tfloatdef(left.resultdef).floattype of
  1237. s32real:
  1238. begin
  1239. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1240. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1241. end;
  1242. s64real:
  1243. begin
  1244. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1245. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1246. end;
  1247. else
  1248. internalerror(2017052103);
  1249. end;
  1250. end;
  1251. if tfloatdef(left.resultdef).floattype<>tfloatdef(resultdef).floattype then
  1252. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,location.register,location.register,mms_movescalar);
  1253. end
  1254. else
  1255. internalerror(2017052101);
  1256. end;
  1257. procedure tx86inlinenode.second_int_real;
  1258. begin
  1259. if use_vectorfpu(resultdef) then
  1260. begin
  1261. secondpass(left);
  1262. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1263. location_reset(location,LOC_MMREGISTER,left.location.size);
  1264. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1265. if UseAVX then
  1266. case tfloatdef(resultdef).floattype of
  1267. s32real:
  1268. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1269. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1270. s64real:
  1271. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1272. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1273. else
  1274. internalerror(2017052105);
  1275. end
  1276. else
  1277. begin
  1278. case tfloatdef(resultdef).floattype of
  1279. s32real:
  1280. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1281. s64real:
  1282. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1283. else
  1284. internalerror(2017052106);
  1285. end;
  1286. end;
  1287. end
  1288. else
  1289. internalerror(2017052107);
  1290. end;
  1291. procedure tx86inlinenode.second_high;
  1292. var
  1293. donelab: tasmlabel;
  1294. hregister : tregister;
  1295. href : treference;
  1296. begin
  1297. secondpass(left);
  1298. if not(is_dynamic_array(left.resultdef)) then
  1299. Internalerror(2019122809);
  1300. { length in dynamic arrays is at offset -sizeof(pint) }
  1301. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1302. current_asmdata.getjumplabel(donelab);
  1303. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1304. flag is set in this case, so we can jump depending on it
  1305. when loading the actual high value, we have to take care later of the decreased value
  1306. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1307. we are using is not working as dec does not touch the carry flag }
  1308. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1309. { volatility of the dyn. array refers to the volatility of the
  1310. string pointer, not of the string data }
  1311. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1312. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1313. { if the string pointer is nil, the length is 0 -> reuse the register
  1314. that originally held the string pointer for the length, so that we
  1315. can keep the original nil/0 as length in that case }
  1316. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1317. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1318. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1319. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1320. location.register:=hregister;
  1321. end;
  1322. procedure tx86inlinenode.second_minmax;
  1323. const
  1324. oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
  1325. (
  1326. (
  1327. (A_MINSS,A_MINSD),
  1328. (A_VMINSS,A_VMINSD)
  1329. ),
  1330. (
  1331. (A_MAXSS,A_MAXSD),
  1332. (A_VMAXSS,A_VMAXSD)
  1333. )
  1334. );
  1335. var
  1336. paraarray : array[1..2] of tnode;
  1337. memop,
  1338. i : integer;
  1339. gotmem : boolean;
  1340. op: TAsmOp;
  1341. begin
  1342. {$ifndef i8086}
  1343. if
  1344. {$ifdef i386}
  1345. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  1346. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  1347. {$else i386}
  1348. is_single(resultdef) or is_double(resultdef)
  1349. {$endif i386}
  1350. then
  1351. begin
  1352. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1353. paraarray[2]:=tcallparanode(parameters).paravalue;
  1354. for i:=low(paraarray) to high(paraarray) do
  1355. secondpass(paraarray[i]);
  1356. { only one memory operand is allowed }
  1357. gotmem:=false;
  1358. memop:=0;
  1359. for i:=low(paraarray) to high(paraarray) do
  1360. begin
  1361. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1362. begin
  1363. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1364. begin
  1365. memop:=i;
  1366. gotmem:=true;
  1367. end
  1368. else
  1369. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1370. end;
  1371. end;
  1372. { due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
  1373. one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
  1374. if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
  1375. begin
  1376. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
  1377. gotmem:=false;
  1378. end;
  1379. op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
  1380. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1381. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1382. if gotmem then
  1383. begin
  1384. if UseAVX then
  1385. case memop of
  1386. 1:
  1387. emit_ref_reg_reg(op,S_NO,
  1388. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1389. 2:
  1390. emit_ref_reg_reg(op,S_NO,
  1391. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1392. else
  1393. internalerror(2020120504);
  1394. end
  1395. else
  1396. case memop of
  1397. 1:
  1398. begin
  1399. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1400. paraarray[2].location.register,location.register,mms_movescalar);
  1401. emit_ref_reg(op,S_NO,
  1402. paraarray[1].location.reference,location.register);
  1403. end;
  1404. 2:
  1405. begin
  1406. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1407. paraarray[1].location.register,location.register,mms_movescalar);
  1408. emit_ref_reg(op,S_NO,
  1409. paraarray[2].location.reference,location.register);
  1410. end;
  1411. else
  1412. internalerror(2020120601);
  1413. end;
  1414. end
  1415. else
  1416. begin
  1417. if UseAVX then
  1418. emit_reg_reg_reg(op,S_NO,
  1419. paraarray[2].location.register,paraarray[1].location.register,location.register)
  1420. else
  1421. begin
  1422. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1423. paraarray[1].location.register,location.register,mms_movescalar);
  1424. emit_reg_reg(op,S_NO,
  1425. paraarray[2].location.register,location.register)
  1426. end;
  1427. end;
  1428. end
  1429. else
  1430. {$endif i8086}
  1431. internalerror(2020120503);
  1432. end;
  1433. end.