nx86inl.pas 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function simplify(forinline : boolean) : tnode; override;
  48. { second pass override to generate these nodes }
  49. procedure pass_generate_code_cpu;override;
  50. procedure second_IncludeExclude;override;
  51. procedure second_pi; override;
  52. procedure second_arctan_real; override;
  53. procedure second_abs_real; override;
  54. procedure second_round_real; override;
  55. procedure second_sqr_real; override;
  56. procedure second_sqrt_real; override;
  57. procedure second_ln_real; override;
  58. procedure second_cos_real; override;
  59. procedure second_sin_real; override;
  60. procedure second_trunc_real; override;
  61. procedure second_prefetch;override;
  62. procedure second_abs_long;override;
  63. procedure second_popcnt;override;
  64. procedure second_fma;override;
  65. procedure second_frac_real;override;
  66. procedure second_int_real;override;
  67. procedure second_high;override;
  68. private
  69. procedure load_fpu_location(lnode: tnode);
  70. end;
  71. implementation
  72. uses
  73. systems,
  74. globtype,globals,
  75. verbose,compinnr,fmodule,
  76. defutil,
  77. aasmbase,aasmdata,aasmcpu,
  78. symconst,symtype,symdef,symcpu,
  79. ncnv,
  80. htypechk,
  81. cgbase,pass_1,pass_2,
  82. cpuinfo,cpubase,nutils,
  83. ncal,ncgutil,nld,ncon,
  84. tgobj,
  85. cga,cgutils,cgx86,cgobj,hlcgobj;
  86. {*****************************************************************************
  87. TX86INLINENODE
  88. *****************************************************************************}
  89. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  90. begin
  91. { only makes a difference for x86_64 }
  92. end;
  93. function tx86inlinenode.pass_typecheck_cpu: tnode;
  94. begin
  95. Result:=nil;
  96. case inlinenumber of
  97. in_x86_inportb:
  98. begin
  99. CheckParameters(1);
  100. resultdef:=u8inttype;
  101. end;
  102. in_x86_inportw:
  103. begin
  104. CheckParameters(1);
  105. resultdef:=u16inttype;
  106. end;
  107. in_x86_inportl:
  108. begin
  109. CheckParameters(1);
  110. resultdef:=s32inttype;
  111. end;
  112. in_x86_outportb,
  113. in_x86_outportw,
  114. in_x86_outportl:
  115. begin
  116. CheckParameters(2);
  117. resultdef:=voidtype;
  118. end;
  119. in_x86_cli,
  120. in_x86_sti:
  121. resultdef:=voidtype;
  122. in_x86_get_cs,
  123. in_x86_get_ss,
  124. in_x86_get_ds,
  125. in_x86_get_es,
  126. in_x86_get_fs,
  127. in_x86_get_gs:
  128. {$ifdef i8086}
  129. resultdef:=u16inttype;
  130. {$else i8086}
  131. resultdef:=s32inttype;
  132. {$endif i8086}
  133. { include automatically generated code }
  134. {$i x86mmtype.inc}
  135. else
  136. Result:=inherited pass_typecheck_cpu;
  137. end;
  138. end;
  139. function tx86inlinenode.first_cpu: tnode;
  140. begin
  141. Result:=nil;
  142. case inlinenumber of
  143. in_x86_inportb,
  144. in_x86_inportw,
  145. in_x86_inportl,
  146. in_x86_get_cs,
  147. in_x86_get_ss,
  148. in_x86_get_ds,
  149. in_x86_get_es,
  150. in_x86_get_fs,
  151. in_x86_get_gs:
  152. expectloc:=LOC_REGISTER;
  153. in_x86_outportb,
  154. in_x86_outportw,
  155. in_x86_outportl,
  156. in_x86_cli,
  157. in_x86_sti:
  158. expectloc:=LOC_VOID;
  159. { include automatically generated code }
  160. {$i x86mmfirst.inc}
  161. else
  162. Result:=inherited first_cpu;
  163. end;
  164. end;
  165. function tx86inlinenode.first_pi : tnode;
  166. begin
  167. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  168. begin
  169. expectloc:=LOC_FPUREGISTER;
  170. first_pi := nil;
  171. end
  172. else
  173. result:=inherited;
  174. end;
  175. function tx86inlinenode.first_arctan_real : tnode;
  176. begin
  177. {$ifdef i8086}
  178. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  179. so we need to use the RTL helper on these FPUs }
  180. if current_settings.cputype < cpu_386 then
  181. begin
  182. result := inherited;
  183. exit;
  184. end;
  185. {$endif i8086}
  186. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  187. begin
  188. expectloc:=LOC_FPUREGISTER;
  189. first_arctan_real := nil;
  190. end
  191. else
  192. result:=inherited;
  193. end;
  194. function tx86inlinenode.first_abs_real : tnode;
  195. begin
  196. if use_vectorfpu(resultdef) then
  197. expectloc:=LOC_MMREGISTER
  198. else
  199. expectloc:=LOC_FPUREGISTER;
  200. first_abs_real := nil;
  201. end;
  202. function tx86inlinenode.first_sqr_real : tnode;
  203. begin
  204. if use_vectorfpu(resultdef) then
  205. expectloc:=LOC_MMREGISTER
  206. else
  207. expectloc:=LOC_FPUREGISTER;
  208. first_sqr_real := nil;
  209. end;
  210. function tx86inlinenode.first_sqrt_real : tnode;
  211. begin
  212. if use_vectorfpu(resultdef) then
  213. expectloc:=LOC_MMREGISTER
  214. else
  215. expectloc:=LOC_FPUREGISTER;
  216. first_sqrt_real := nil;
  217. end;
  218. function tx86inlinenode.first_ln_real : tnode;
  219. begin
  220. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  221. begin
  222. expectloc:=LOC_FPUREGISTER;
  223. first_ln_real := nil;
  224. end
  225. else
  226. result:=inherited;
  227. end;
  228. function tx86inlinenode.first_cos_real : tnode;
  229. begin
  230. {$ifdef i8086}
  231. { FCOS is 387+ }
  232. if current_settings.cputype < cpu_386 then
  233. begin
  234. result := inherited;
  235. exit;
  236. end;
  237. {$endif i8086}
  238. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  239. begin
  240. expectloc:=LOC_FPUREGISTER;
  241. result:=nil;
  242. end
  243. else
  244. result:=inherited;
  245. end;
  246. function tx86inlinenode.first_sin_real : tnode;
  247. begin
  248. {$ifdef i8086}
  249. { FSIN is 387+ }
  250. if current_settings.cputype < cpu_386 then
  251. begin
  252. result := inherited;
  253. exit;
  254. end;
  255. {$endif i8086}
  256. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  257. begin
  258. expectloc:=LOC_FPUREGISTER;
  259. result:=nil;
  260. end
  261. else
  262. result:=inherited;
  263. end;
  264. function tx86inlinenode.first_round_real : tnode;
  265. begin
  266. maybe_remove_round_trunc_typeconv;
  267. {$ifdef x86_64}
  268. if use_vectorfpu(left.resultdef) then
  269. expectloc:=LOC_REGISTER
  270. else
  271. {$endif x86_64}
  272. expectloc:=LOC_REFERENCE;
  273. result:=nil;
  274. end;
  275. function tx86inlinenode.first_trunc_real: tnode;
  276. begin
  277. maybe_remove_round_trunc_typeconv;
  278. if (cs_opt_size in current_settings.optimizerswitches)
  279. {$ifdef x86_64}
  280. and not(use_vectorfpu(left.resultdef))
  281. {$endif x86_64}
  282. then
  283. result:=inherited
  284. else
  285. begin
  286. {$ifdef x86_64}
  287. if use_vectorfpu(left.resultdef) then
  288. expectloc:=LOC_REGISTER
  289. else
  290. {$endif x86_64}
  291. expectloc:=LOC_REFERENCE;
  292. result:=nil;
  293. end;
  294. end;
  295. function tx86inlinenode.first_popcnt: tnode;
  296. begin
  297. Result:=nil;
  298. {$ifndef i8086}
  299. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  300. {$ifdef i386}
  301. and not is_64bit(left.resultdef)
  302. {$endif i386}
  303. then
  304. expectloc:=LOC_REGISTER
  305. else
  306. {$endif not i8086}
  307. Result:=inherited first_popcnt
  308. end;
  309. function tx86inlinenode.first_fma : tnode;
  310. begin
  311. {$ifndef i8086}
  312. if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
  313. ((is_double(resultdef)) or (is_single(resultdef))) then
  314. begin
  315. expectloc:=LOC_MMREGISTER;
  316. Result:=nil;
  317. end
  318. else
  319. {$endif i8086}
  320. Result:=inherited first_fma;
  321. end;
  322. function tx86inlinenode.first_frac_real : tnode;
  323. begin
  324. if (current_settings.fputype>=fpu_sse41) and
  325. ((is_double(resultdef)) or (is_single(resultdef))) then
  326. begin
  327. maybe_remove_round_trunc_typeconv;
  328. expectloc:=LOC_MMREGISTER;
  329. Result:=nil;
  330. end
  331. else
  332. Result:=inherited first_frac_real;
  333. end;
  334. function tx86inlinenode.first_int_real : tnode;
  335. begin
  336. if (current_settings.fputype>=fpu_sse41) and
  337. ((is_double(resultdef)) or (is_single(resultdef))) then
  338. begin
  339. Result:=nil;
  340. expectloc:=LOC_MMREGISTER;
  341. end
  342. else
  343. Result:=inherited first_int_real;
  344. end;
  345. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  346. var
  347. temp : tnode;
  348. begin
  349. if (current_settings.fputype>=fpu_sse41) and
  350. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  351. not(nf_explicit in left.flags) and
  352. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  353. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  354. begin
  355. { get rid of the type conversion }
  356. temp:=ttypeconvnode(left).left;
  357. ttypeconvnode(left).left:=nil;
  358. left.free;
  359. left:=temp;
  360. result:=self.getcopy;
  361. tinlinenode(result).resultdef:=temp.resultdef;
  362. typecheckpass(result);
  363. end
  364. else
  365. Result:=inherited simplify(forinline);
  366. end;
  367. procedure tx86inlinenode.pass_generate_code_cpu;
  368. var
  369. paraarray : array[1..4] of tnode;
  370. i : integer;
  371. op: TAsmOp;
  372. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  373. var
  374. portnumber: tnode;
  375. begin
  376. portnumber:=left;
  377. secondpass(portnumber);
  378. if (portnumber.location.loc=LOC_CONSTANT) and
  379. (portnumber.location.value>=0) and
  380. (portnumber.location.value<=255) then
  381. begin
  382. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  383. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  384. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  385. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  386. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  387. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  388. end
  389. else
  390. begin
  391. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  392. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  393. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  394. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  395. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  396. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  397. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  398. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  399. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  400. end;
  401. end;
  402. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  403. var
  404. portnumber, portdata: tnode;
  405. begin
  406. portnumber:=tcallparanode(tcallparanode(left).right).left;
  407. portdata:=tcallparanode(left).left;
  408. secondpass(portdata);
  409. secondpass(portnumber);
  410. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  411. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  412. if (portnumber.location.loc=LOC_CONSTANT) and
  413. (portnumber.location.value>=0) and
  414. (portnumber.location.value<=255) then
  415. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  416. else
  417. begin
  418. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  419. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  420. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  421. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  422. end;
  423. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  424. end;
  425. procedure get_segreg(segreg:tregister);
  426. begin
  427. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  428. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  429. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  430. end;
  431. function GetConstInt(n: tnode): longint;
  432. begin
  433. Result:=0;
  434. if is_constintnode(n) then
  435. result:=tordconstnode(n).value.svalue
  436. else
  437. Message(type_e_constant_expr_expected);
  438. end;
  439. procedure GetParameters(count: longint);
  440. var
  441. i: longint;
  442. p: tnode;
  443. begin
  444. if (count=1) and
  445. (not (left is tcallparanode)) then
  446. paraarray[1]:=left
  447. else
  448. begin
  449. p:=left;
  450. for i := count downto 1 do
  451. begin
  452. paraarray[i]:=tcallparanode(p).paravalue;
  453. p:=tcallparanode(p).nextpara;
  454. end;
  455. end;
  456. end;
  457. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  458. var
  459. reg : tregister;
  460. begin
  461. if (l.loc<>LOC_MMXREGISTER) and
  462. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  463. begin
  464. reg:=tcgx86(cg).getmmxregister(list);
  465. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  466. location_freetemp(list,l);
  467. location_reset(l,LOC_MMXREGISTER,OS_M64);
  468. l.register:=reg;
  469. end;
  470. end;
  471. procedure location_make_ref(var loc: tlocation);
  472. var
  473. hloc: tlocation;
  474. begin
  475. case loc.loc of
  476. LOC_CREGISTER,
  477. LOC_REGISTER:
  478. begin
  479. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  480. hloc.reference.base:=loc.register;
  481. loc:=hloc;
  482. end;
  483. LOC_CREFERENCE,
  484. LOC_REFERENCE:
  485. begin
  486. end;
  487. else
  488. begin
  489. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  490. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  491. hloc.reference.base:=loc.register;
  492. loc:=hloc;
  493. end;
  494. end;
  495. end;
  496. begin
  497. FillChar(paraarray,sizeof(paraarray),0);
  498. case inlinenumber of
  499. in_x86_inportb:
  500. inport(NR_AL,S_B,u8inttype);
  501. in_x86_inportw:
  502. inport(NR_AX,S_W,u16inttype);
  503. in_x86_inportl:
  504. inport(NR_EAX,S_L,s32inttype);
  505. in_x86_outportb:
  506. outport(NR_AL,S_B,u8inttype);
  507. in_x86_outportw:
  508. outport(NR_AX,S_W,u16inttype);
  509. in_x86_outportl:
  510. outport(NR_EAX,S_L,s32inttype);
  511. in_x86_cli:
  512. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  513. in_x86_sti:
  514. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  515. in_x86_get_cs:
  516. get_segreg(NR_CS);
  517. in_x86_get_ss:
  518. get_segreg(NR_SS);
  519. in_x86_get_ds:
  520. get_segreg(NR_DS);
  521. in_x86_get_es:
  522. get_segreg(NR_ES);
  523. in_x86_get_fs:
  524. get_segreg(NR_FS);
  525. in_x86_get_gs:
  526. get_segreg(NR_GS);
  527. {$i x86mmsecond.inc}
  528. else
  529. inherited pass_generate_code_cpu;
  530. end;
  531. end;
  532. procedure tx86inlinenode.second_pi;
  533. begin
  534. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  535. emit_none(A_FLDPI,S_NO);
  536. tcgx86(cg).inc_fpu_stack;
  537. location.register:=NR_FPU_RESULT_REG;
  538. end;
  539. { load the FPU into the an fpu register }
  540. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  541. begin
  542. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  543. location.register:=NR_FPU_RESULT_REG;
  544. secondpass(lnode);
  545. case lnode.location.loc of
  546. LOC_FPUREGISTER:
  547. ;
  548. LOC_CFPUREGISTER:
  549. begin
  550. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  551. lnode.location.size,lnode.location.register,location.register);
  552. end;
  553. LOC_REFERENCE,LOC_CREFERENCE:
  554. begin
  555. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  556. lnode.location.size,lnode.location.size,
  557. lnode.location.reference,location.register);
  558. end;
  559. LOC_MMREGISTER,LOC_CMMREGISTER:
  560. begin
  561. location:=lnode.location;
  562. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,resultdef,false);
  563. end;
  564. else
  565. internalerror(309991);
  566. end;
  567. end;
  568. procedure tx86inlinenode.second_arctan_real;
  569. begin
  570. load_fpu_location(left);
  571. emit_none(A_FLD1,S_NO);
  572. emit_none(A_FPATAN,S_NO);
  573. end;
  574. procedure tx86inlinenode.second_abs_real;
  575. function needs_indirect:boolean; inline;
  576. begin
  577. result:=(tf_supports_packages in target_info.flags) and
  578. (target_info.system in systems_indirect_var_imports);
  579. end;
  580. var
  581. href : treference;
  582. sym : tasmsymbol;
  583. begin
  584. if use_vectorfpu(resultdef) then
  585. begin
  586. secondpass(left);
  587. if left.location.loc<>LOC_MMREGISTER then
  588. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  589. if UseAVX then
  590. begin
  591. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  592. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  593. end
  594. else
  595. location:=left.location;
  596. case tfloatdef(resultdef).floattype of
  597. s32real:
  598. begin
  599. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  600. reference_reset_symbol(href,sym,0,4,[]);
  601. current_module.add_extern_asmsym(sym);
  602. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  603. if UseAVX then
  604. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  605. A_VANDPS,S_XMM,href,left.location.register,location.register))
  606. else
  607. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  608. end;
  609. s64real:
  610. begin
  611. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  612. reference_reset_symbol(href,sym,0,4,[]);
  613. current_module.add_extern_asmsym(sym);
  614. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  615. if UseAVX then
  616. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  617. A_VANDPD,S_XMM,href,left.location.register,location.register))
  618. else
  619. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  620. end;
  621. else
  622. internalerror(200506081);
  623. end;
  624. end
  625. else
  626. begin
  627. load_fpu_location(left);
  628. emit_none(A_FABS,S_NO);
  629. end;
  630. end;
  631. procedure tx86inlinenode.second_round_real;
  632. begin
  633. {$ifdef x86_64}
  634. if use_vectorfpu(left.resultdef) then
  635. begin
  636. secondpass(left);
  637. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  638. location_reset(location,LOC_REGISTER,OS_S64);
  639. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  640. if UseAVX then
  641. case left.location.size of
  642. OS_F32:
  643. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  644. OS_F64:
  645. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  646. else
  647. internalerror(2007031402);
  648. end
  649. else
  650. case left.location.size of
  651. OS_F32:
  652. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  653. OS_F64:
  654. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  655. else
  656. internalerror(2007031402);
  657. end;
  658. end
  659. else
  660. {$endif x86_64}
  661. begin
  662. load_fpu_location(left);
  663. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  664. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  665. emit_ref(A_FISTP,S_IQ,location.reference);
  666. tcgx86(cg).dec_fpu_stack;
  667. emit_none(A_FWAIT,S_NO);
  668. end;
  669. end;
  670. procedure tx86inlinenode.second_trunc_real;
  671. var
  672. oldcw,newcw : treference;
  673. begin
  674. {$ifdef x86_64}
  675. if use_vectorfpu(left.resultdef) and
  676. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  677. begin
  678. secondpass(left);
  679. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  680. location_reset(location,LOC_REGISTER,OS_S64);
  681. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  682. if UseAVX then
  683. case left.location.size of
  684. OS_F32:
  685. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  686. OS_F64:
  687. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  688. else
  689. internalerror(2007031401);
  690. end
  691. else
  692. case left.location.size of
  693. OS_F32:
  694. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  695. OS_F64:
  696. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  697. else
  698. internalerror(2007031401);
  699. end;
  700. end
  701. else
  702. {$endif x86_64}
  703. begin
  704. if (current_settings.fputype>=fpu_sse3) then
  705. begin
  706. load_fpu_location(left);
  707. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  708. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  709. emit_ref(A_FISTTP,S_IQ,location.reference);
  710. tcgx86(cg).dec_fpu_stack;
  711. end
  712. else
  713. begin
  714. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  715. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  716. {$ifdef i8086}
  717. if current_settings.cputype<=cpu_286 then
  718. begin
  719. emit_ref(A_FSTCW,S_NO,newcw);
  720. emit_ref(A_FSTCW,S_NO,oldcw);
  721. emit_none(A_FWAIT,S_NO);
  722. end
  723. else
  724. {$endif i8086}
  725. begin
  726. emit_ref(A_FNSTCW,S_NO,newcw);
  727. emit_ref(A_FNSTCW,S_NO,oldcw);
  728. end;
  729. emit_const_ref(A_OR,S_W,$0f00,newcw);
  730. load_fpu_location(left);
  731. emit_ref(A_FLDCW,S_NO,newcw);
  732. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  733. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  734. emit_ref(A_FISTP,S_IQ,location.reference);
  735. tcgx86(cg).dec_fpu_stack;
  736. emit_ref(A_FLDCW,S_NO,oldcw);
  737. emit_none(A_FWAIT,S_NO);
  738. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  739. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  740. end;
  741. end;
  742. end;
  743. procedure tx86inlinenode.second_sqr_real;
  744. begin
  745. if use_vectorfpu(resultdef) then
  746. begin
  747. secondpass(left);
  748. location_reset(location,LOC_MMREGISTER,left.location.size);
  749. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  750. if UseAVX then
  751. begin
  752. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  753. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  754. end
  755. else
  756. begin
  757. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  758. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  759. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  760. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  761. end;
  762. end
  763. else
  764. begin
  765. load_fpu_location(left);
  766. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  767. end;
  768. end;
  769. procedure tx86inlinenode.second_sqrt_real;
  770. begin
  771. if use_vectorfpu(resultdef) then
  772. begin
  773. secondpass(left);
  774. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  775. location_reset(location,LOC_MMREGISTER,left.location.size);
  776. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  777. if UseAVX then
  778. case tfloatdef(resultdef).floattype of
  779. s32real:
  780. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  781. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  782. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  783. s64real:
  784. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  785. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  786. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  787. else
  788. internalerror(200510031);
  789. end
  790. else
  791. case tfloatdef(resultdef).floattype of
  792. s32real:
  793. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  794. s64real:
  795. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  796. else
  797. internalerror(200510031);
  798. end;
  799. end
  800. else
  801. begin
  802. load_fpu_location(left);
  803. emit_none(A_FSQRT,S_NO);
  804. end;
  805. end;
  806. procedure tx86inlinenode.second_ln_real;
  807. begin
  808. load_fpu_location(left);
  809. emit_none(A_FLDLN2,S_NO);
  810. emit_none(A_FXCH,S_NO);
  811. emit_none(A_FYL2X,S_NO);
  812. end;
  813. procedure tx86inlinenode.second_cos_real;
  814. begin
  815. {$ifdef i8086}
  816. { FCOS is 387+ }
  817. if current_settings.cputype < cpu_386 then
  818. begin
  819. inherited;
  820. exit;
  821. end;
  822. {$endif i8086}
  823. load_fpu_location(left);
  824. emit_none(A_FCOS,S_NO);
  825. end;
  826. procedure tx86inlinenode.second_sin_real;
  827. begin
  828. {$ifdef i8086}
  829. { FSIN is 387+ }
  830. if current_settings.cputype < cpu_386 then
  831. begin
  832. inherited;
  833. exit;
  834. end;
  835. {$endif i8086}
  836. load_fpu_location(left);
  837. emit_none(A_FSIN,S_NO)
  838. end;
  839. procedure tx86inlinenode.second_prefetch;
  840. var
  841. ref : treference;
  842. r : tregister;
  843. checkpointer_used : boolean;
  844. begin
  845. {$if defined(i386) or defined(i8086)}
  846. if current_settings.cputype>=cpu_Pentium3 then
  847. {$endif i386 or i8086}
  848. begin
  849. { do not call Checkpointer for left node }
  850. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  851. if checkpointer_used then
  852. node_change_local_switch(left,cs_checkpointer,false);
  853. secondpass(left);
  854. if checkpointer_used then
  855. node_change_local_switch(left,cs_checkpointer,false);
  856. case left.location.loc of
  857. LOC_CREFERENCE,
  858. LOC_REFERENCE:
  859. begin
  860. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  861. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  862. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  863. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  864. end;
  865. else
  866. { nothing to prefetch };
  867. end;
  868. end;
  869. end;
  870. procedure tx86inlinenode.second_abs_long;
  871. var
  872. hregister : tregister;
  873. opsize : tcgsize;
  874. hp : taicpu;
  875. begin
  876. {$if defined(i8086) or defined(i386)}
  877. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  878. begin
  879. opsize:=def_cgsize(left.resultdef);
  880. secondpass(left);
  881. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  882. location:=left.location;
  883. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  884. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  885. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  886. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  887. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  888. end
  889. else
  890. {$endif i8086 or i386}
  891. begin
  892. opsize:=def_cgsize(left.resultdef);
  893. secondpass(left);
  894. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  895. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  896. location:=left.location;
  897. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  898. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  899. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  900. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  901. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  902. hp.condition:=C_NS;
  903. current_asmdata.CurrAsmList.concat(hp);
  904. end;
  905. end;
  906. {*****************************************************************************
  907. INCLUDE/EXCLUDE GENERIC HANDLING
  908. *****************************************************************************}
  909. procedure tx86inlinenode.second_IncludeExclude;
  910. var
  911. hregister,
  912. hregister2: tregister;
  913. setbase : aint;
  914. bitsperop,l : longint;
  915. cgop : topcg;
  916. asmop : tasmop;
  917. opdef : tdef;
  918. opsize,
  919. orgsize: tcgsize;
  920. begin
  921. {$ifdef i8086}
  922. { BTS and BTR are 386+ }
  923. if current_settings.cputype < cpu_386 then
  924. begin
  925. inherited;
  926. exit;
  927. end;
  928. {$endif i8086}
  929. if is_smallset(tcallparanode(left).resultdef) then
  930. begin
  931. opdef:=tcallparanode(left).resultdef;
  932. opsize:=int_cgsize(opdef.size)
  933. end
  934. else
  935. begin
  936. opdef:=u32inttype;
  937. opsize:=OS_32;
  938. end;
  939. bitsperop:=(8*tcgsize2size[opsize]);
  940. secondpass(tcallparanode(left).left);
  941. secondpass(tcallparanode(tcallparanode(left).right).left);
  942. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  943. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  944. begin
  945. { calculate bit position }
  946. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  947. { determine operator }
  948. if inlinenumber=in_include_x_y then
  949. cgop:=OP_OR
  950. else
  951. begin
  952. cgop:=OP_AND;
  953. l:=not(l);
  954. end;
  955. case tcallparanode(left).left.location.loc of
  956. LOC_REFERENCE :
  957. begin
  958. inc(tcallparanode(left).left.location.reference.offset,
  959. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  960. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  961. end;
  962. LOC_CREGISTER :
  963. cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
  964. else
  965. internalerror(200405022);
  966. end;
  967. end
  968. else
  969. begin
  970. orgsize:=opsize;
  971. if opsize in [OS_8,OS_S8] then
  972. begin
  973. opdef:=u32inttype;
  974. opsize:=OS_32;
  975. end;
  976. { determine asm operator }
  977. if inlinenumber=in_include_x_y then
  978. asmop:=A_BTS
  979. else
  980. asmop:=A_BTR;
  981. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  982. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  983. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  984. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  985. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  986. else
  987. begin
  988. { second argument can't be an 8 bit register either }
  989. hregister2:=tcallparanode(left).left.location.register;
  990. if (orgsize in [OS_8,OS_S8]) then
  991. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  992. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  993. end;
  994. end;
  995. end;
  996. procedure tx86inlinenode.second_popcnt;
  997. var
  998. opsize: tcgsize;
  999. begin
  1000. secondpass(left);
  1001. opsize:=tcgsize2unsigned[left.location.size];
  1002. { no 8 Bit popcont }
  1003. if opsize=OS_8 then
  1004. opsize:=OS_16;
  1005. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1006. (left.location.size<>opsize) then
  1007. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1008. location_reset(location,LOC_REGISTER,opsize);
  1009. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1010. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1011. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1012. else
  1013. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1014. end;
  1015. procedure tx86inlinenode.second_fma;
  1016. const
  1017. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1018. (
  1019. { positive product }
  1020. (
  1021. { positive third operand }
  1022. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1023. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1024. ),
  1025. { negative third operand }
  1026. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1027. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1028. )
  1029. ),
  1030. { negative product }
  1031. (
  1032. { positive third operand }
  1033. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1034. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1035. ),
  1036. { negative third operand }
  1037. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1038. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1039. )
  1040. )
  1041. );
  1042. var
  1043. paraarray : array[1..3] of tnode;
  1044. memop,
  1045. i : integer;
  1046. negop3,
  1047. negproduct,
  1048. gotmem : boolean;
  1049. begin
  1050. {$ifndef i8086}
  1051. if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
  1052. begin
  1053. negop3:=false;
  1054. negproduct:=false;
  1055. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1056. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1057. paraarray[3]:=tcallparanode(parameters).paravalue;
  1058. { check if a neg. node can be removed
  1059. this is possible because changing the sign of
  1060. a floating point number does not affect its absolute
  1061. value in any way
  1062. }
  1063. if paraarray[1].nodetype=unaryminusn then
  1064. begin
  1065. paraarray[1]:=tunarynode(paraarray[1]).left;
  1066. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1067. only no code is generated for it }
  1068. negproduct:=not(negproduct);
  1069. end;
  1070. if paraarray[2].nodetype=unaryminusn then
  1071. begin
  1072. paraarray[2]:=tunarynode(paraarray[2]).left;
  1073. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1074. only no code is generated for it }
  1075. negproduct:=not(negproduct);
  1076. end;
  1077. if paraarray[3].nodetype=unaryminusn then
  1078. begin
  1079. paraarray[3]:=tunarynode(paraarray[3]).left;
  1080. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1081. only no code is generated for it }
  1082. negop3:=true;
  1083. end;
  1084. for i:=1 to 3 do
  1085. secondpass(paraarray[i]);
  1086. { only one memory operand is allowed }
  1087. gotmem:=false;
  1088. memop:=0;
  1089. for i:=1 to 3 do
  1090. begin
  1091. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1092. begin
  1093. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1094. begin
  1095. memop:=i;
  1096. gotmem:=true;
  1097. end
  1098. else
  1099. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1100. end;
  1101. end;
  1102. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1103. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1104. if gotmem then
  1105. begin
  1106. case memop of
  1107. 1:
  1108. begin
  1109. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1110. paraarray[3].location.register,location.register,mms_movescalar);
  1111. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1112. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1113. end;
  1114. 2:
  1115. begin
  1116. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1117. paraarray[3].location.register,location.register,mms_movescalar);
  1118. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1119. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1120. end;
  1121. 3:
  1122. begin
  1123. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1124. paraarray[1].location.register,location.register,mms_movescalar);
  1125. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1126. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1127. end
  1128. else
  1129. internalerror(2014041301);
  1130. end;
  1131. end
  1132. else
  1133. begin
  1134. { try to use the location which is already in a temp. mm register as destination,
  1135. so the compiler might be able to re-use the register }
  1136. if paraarray[1].location.loc=LOC_MMREGISTER then
  1137. begin
  1138. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1139. paraarray[1].location.register,location.register,mms_movescalar);
  1140. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1141. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1142. end
  1143. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1144. begin
  1145. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1146. paraarray[2].location.register,location.register,mms_movescalar);
  1147. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1148. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1149. end
  1150. else
  1151. begin
  1152. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1153. paraarray[3].location.register,location.register,mms_movescalar);
  1154. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1155. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1156. end;
  1157. end;
  1158. end
  1159. else
  1160. {$endif i8086}
  1161. internalerror(2014032301);
  1162. end;
  1163. procedure tx86inlinenode.second_frac_real;
  1164. var
  1165. extrareg : TRegister;
  1166. begin
  1167. if use_vectorfpu(resultdef) then
  1168. begin
  1169. secondpass(left);
  1170. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1171. location_reset(location,LOC_MMREGISTER,left.location.size);
  1172. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1173. if UseAVX then
  1174. case tfloatdef(resultdef).floattype of
  1175. s32real:
  1176. begin
  1177. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1178. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1179. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1180. end;
  1181. s64real:
  1182. begin
  1183. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1184. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1185. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1186. end;
  1187. else
  1188. internalerror(2017052102);
  1189. end
  1190. else
  1191. begin
  1192. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1193. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1194. case tfloatdef(resultdef).floattype of
  1195. s32real:
  1196. begin
  1197. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1198. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1199. end;
  1200. s64real:
  1201. begin
  1202. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1203. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1204. end;
  1205. else
  1206. internalerror(2017052103);
  1207. end;
  1208. end;
  1209. end
  1210. else
  1211. internalerror(2017052101);
  1212. end;
  1213. procedure tx86inlinenode.second_int_real;
  1214. begin
  1215. if use_vectorfpu(resultdef) then
  1216. begin
  1217. secondpass(left);
  1218. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1219. location_reset(location,LOC_MMREGISTER,left.location.size);
  1220. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1221. if UseAVX then
  1222. case tfloatdef(resultdef).floattype of
  1223. s32real:
  1224. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1225. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1226. s64real:
  1227. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1228. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1229. else
  1230. internalerror(2017052105);
  1231. end
  1232. else
  1233. begin
  1234. case tfloatdef(resultdef).floattype of
  1235. s32real:
  1236. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1237. s64real:
  1238. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1239. else
  1240. internalerror(2017052106);
  1241. end;
  1242. end;
  1243. end
  1244. else
  1245. internalerror(2017052107);
  1246. end;
  1247. procedure tx86inlinenode.second_high;
  1248. var
  1249. donelab: tasmlabel;
  1250. hregister : tregister;
  1251. href : treference;
  1252. begin
  1253. secondpass(left);
  1254. if not(is_dynamic_array(left.resultdef)) then
  1255. Internalerror(2019122801);
  1256. { length in dynamic arrays is at offset -sizeof(pint) }
  1257. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1258. current_asmdata.getjumplabel(donelab);
  1259. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1260. flag is set in this case, so we can jump depending on it
  1261. when loading the actual high value, we have to take care later of the decreased value
  1262. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1263. we are using is not working as dec does not touch the carry flag }
  1264. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1265. { volatility of the dyn. array refers to the volatility of the
  1266. string pointer, not of the string data }
  1267. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1268. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1269. { if the string pointer is nil, the length is 0 -> reuse the register
  1270. that originally held the string pointer for the length, so that we
  1271. can keep the original nil/0 as length in that case }
  1272. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1273. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1274. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1275. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1276. location.register:=hregister;
  1277. end;
  1278. end.