nx86inl.pas 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function simplify(forinline : boolean) : tnode; override;
  48. { second pass override to generate these nodes }
  49. procedure pass_generate_code_cpu;override;
  50. procedure second_IncludeExclude;override;
  51. procedure second_pi; override;
  52. procedure second_arctan_real; override;
  53. procedure second_abs_real; override;
  54. procedure second_round_real; override;
  55. procedure second_sqr_real; override;
  56. procedure second_sqrt_real; override;
  57. procedure second_ln_real; override;
  58. procedure second_cos_real; override;
  59. procedure second_sin_real; override;
  60. procedure second_trunc_real; override;
  61. procedure second_prefetch;override;
  62. procedure second_abs_long;override;
  63. procedure second_popcnt;override;
  64. procedure second_fma;override;
  65. procedure second_frac_real;override;
  66. procedure second_int_real;override;
  67. procedure second_high;override;
  68. private
  69. procedure load_fpu_location(lnode: tnode);
  70. end;
  71. implementation
  72. uses
  73. systems,
  74. globtype,globals,
  75. verbose,compinnr,fmodule,
  76. defutil,
  77. aasmbase,aasmdata,aasmcpu,
  78. symconst,symtype,symdef,symcpu,
  79. ncnv,
  80. htypechk,
  81. cgbase,pass_1,pass_2,
  82. cpuinfo,cpubase,nutils,
  83. ncal,ncgutil,nld,ncon,
  84. tgobj,
  85. cga,cgutils,cgx86,cgobj,hlcgobj;
  86. {*****************************************************************************
  87. TX86INLINENODE
  88. *****************************************************************************}
  89. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  90. begin
  91. { only makes a difference for x86_64 }
  92. end;
  93. function tx86inlinenode.pass_typecheck_cpu: tnode;
  94. begin
  95. Result:=nil;
  96. case inlinenumber of
  97. in_x86_inportb:
  98. begin
  99. CheckParameters(1);
  100. resultdef:=u8inttype;
  101. end;
  102. in_x86_inportw:
  103. begin
  104. CheckParameters(1);
  105. resultdef:=u16inttype;
  106. end;
  107. in_x86_inportl:
  108. begin
  109. CheckParameters(1);
  110. resultdef:=s32inttype;
  111. end;
  112. in_x86_outportb,
  113. in_x86_outportw,
  114. in_x86_outportl:
  115. begin
  116. CheckParameters(2);
  117. resultdef:=voidtype;
  118. end;
  119. in_x86_cli,
  120. in_x86_sti:
  121. resultdef:=voidtype;
  122. in_x86_get_cs,
  123. in_x86_get_ss,
  124. in_x86_get_ds,
  125. in_x86_get_es,
  126. in_x86_get_fs,
  127. in_x86_get_gs:
  128. {$ifdef i8086}
  129. resultdef:=u16inttype;
  130. {$else i8086}
  131. resultdef:=s32inttype;
  132. {$endif i8086}
  133. { include automatically generated code }
  134. {$i x86mmtype.inc}
  135. else
  136. Result:=inherited pass_typecheck_cpu;
  137. end;
  138. end;
  139. function tx86inlinenode.first_cpu: tnode;
  140. begin
  141. Result:=nil;
  142. case inlinenumber of
  143. in_x86_inportb,
  144. in_x86_inportw,
  145. in_x86_inportl,
  146. in_x86_get_cs,
  147. in_x86_get_ss,
  148. in_x86_get_ds,
  149. in_x86_get_es,
  150. in_x86_get_fs,
  151. in_x86_get_gs:
  152. expectloc:=LOC_REGISTER;
  153. in_x86_outportb,
  154. in_x86_outportw,
  155. in_x86_outportl,
  156. in_x86_cli,
  157. in_x86_sti:
  158. expectloc:=LOC_VOID;
  159. { include automatically generated code }
  160. {$i x86mmfirst.inc}
  161. else
  162. Result:=inherited first_cpu;
  163. end;
  164. end;
  165. function tx86inlinenode.first_pi : tnode;
  166. begin
  167. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  168. begin
  169. expectloc:=LOC_FPUREGISTER;
  170. first_pi := nil;
  171. end
  172. else
  173. result:=inherited;
  174. end;
  175. function tx86inlinenode.first_arctan_real : tnode;
  176. begin
  177. {$ifdef i8086}
  178. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  179. so we need to use the RTL helper on these FPUs }
  180. if current_settings.cputype < cpu_386 then
  181. begin
  182. result := inherited;
  183. exit;
  184. end;
  185. {$endif i8086}
  186. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  187. begin
  188. expectloc:=LOC_FPUREGISTER;
  189. first_arctan_real := nil;
  190. end
  191. else
  192. result:=inherited;
  193. end;
  194. function tx86inlinenode.first_abs_real : tnode;
  195. begin
  196. if use_vectorfpu(resultdef) then
  197. expectloc:=LOC_MMREGISTER
  198. else
  199. expectloc:=LOC_FPUREGISTER;
  200. first_abs_real := nil;
  201. end;
  202. function tx86inlinenode.first_sqr_real : tnode;
  203. begin
  204. if use_vectorfpu(resultdef) then
  205. expectloc:=LOC_MMREGISTER
  206. else
  207. expectloc:=LOC_FPUREGISTER;
  208. first_sqr_real := nil;
  209. end;
  210. function tx86inlinenode.first_sqrt_real : tnode;
  211. begin
  212. if use_vectorfpu(resultdef) then
  213. expectloc:=LOC_MMREGISTER
  214. else
  215. expectloc:=LOC_FPUREGISTER;
  216. first_sqrt_real := nil;
  217. end;
  218. function tx86inlinenode.first_ln_real : tnode;
  219. begin
  220. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  221. begin
  222. expectloc:=LOC_FPUREGISTER;
  223. first_ln_real := nil;
  224. end
  225. else
  226. result:=inherited;
  227. end;
  228. function tx86inlinenode.first_cos_real : tnode;
  229. begin
  230. {$ifdef i8086}
  231. { FCOS is 387+ }
  232. if current_settings.cputype < cpu_386 then
  233. begin
  234. result := inherited;
  235. exit;
  236. end;
  237. {$endif i8086}
  238. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  239. begin
  240. expectloc:=LOC_FPUREGISTER;
  241. result:=nil;
  242. end
  243. else
  244. result:=inherited;
  245. end;
  246. function tx86inlinenode.first_sin_real : tnode;
  247. begin
  248. {$ifdef i8086}
  249. { FSIN is 387+ }
  250. if current_settings.cputype < cpu_386 then
  251. begin
  252. result := inherited;
  253. exit;
  254. end;
  255. {$endif i8086}
  256. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  257. begin
  258. expectloc:=LOC_FPUREGISTER;
  259. result:=nil;
  260. end
  261. else
  262. result:=inherited;
  263. end;
  264. function tx86inlinenode.first_round_real : tnode;
  265. begin
  266. maybe_remove_round_trunc_typeconv;
  267. {$ifdef x86_64}
  268. if use_vectorfpu(left.resultdef) then
  269. expectloc:=LOC_REGISTER
  270. else
  271. {$endif x86_64}
  272. expectloc:=LOC_REFERENCE;
  273. result:=nil;
  274. end;
  275. function tx86inlinenode.first_trunc_real: tnode;
  276. begin
  277. maybe_remove_round_trunc_typeconv;
  278. if (cs_opt_size in current_settings.optimizerswitches)
  279. {$ifdef x86_64}
  280. and not(use_vectorfpu(left.resultdef))
  281. {$endif x86_64}
  282. then
  283. result:=inherited
  284. else
  285. begin
  286. {$ifdef x86_64}
  287. if use_vectorfpu(left.resultdef) then
  288. expectloc:=LOC_REGISTER
  289. else
  290. {$endif x86_64}
  291. expectloc:=LOC_REFERENCE;
  292. result:=nil;
  293. end;
  294. end;
  295. function tx86inlinenode.first_popcnt: tnode;
  296. begin
  297. Result:=nil;
  298. {$ifndef i8086}
  299. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  300. {$ifdef i386}
  301. and not is_64bit(left.resultdef)
  302. {$endif i386}
  303. then
  304. expectloc:=LOC_REGISTER
  305. else
  306. {$endif not i8086}
  307. Result:=inherited first_popcnt
  308. end;
  309. function tx86inlinenode.first_fma : tnode;
  310. begin
  311. {$ifndef i8086}
  312. if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
  313. ((is_double(resultdef)) or (is_single(resultdef))) then
  314. begin
  315. expectloc:=LOC_MMREGISTER;
  316. Result:=nil;
  317. end
  318. else
  319. {$endif i8086}
  320. Result:=inherited first_fma;
  321. end;
  322. function tx86inlinenode.first_frac_real : tnode;
  323. begin
  324. if (current_settings.fputype>=fpu_sse41) and
  325. ((is_double(resultdef)) or (is_single(resultdef))) then
  326. begin
  327. maybe_remove_round_trunc_typeconv;
  328. expectloc:=LOC_MMREGISTER;
  329. Result:=nil;
  330. end
  331. else
  332. Result:=inherited first_frac_real;
  333. end;
  334. function tx86inlinenode.first_int_real : tnode;
  335. begin
  336. if (current_settings.fputype>=fpu_sse41) and
  337. ((is_double(resultdef)) or (is_single(resultdef))) then
  338. begin
  339. Result:=nil;
  340. expectloc:=LOC_MMREGISTER;
  341. end
  342. else
  343. Result:=inherited first_int_real;
  344. end;
  345. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  346. var
  347. temp : tnode;
  348. begin
  349. if (current_settings.fputype>=fpu_sse41) and
  350. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  351. not(nf_explicit in left.flags) and
  352. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  353. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  354. begin
  355. { get rid of the type conversion }
  356. temp:=ttypeconvnode(left).left;
  357. ttypeconvnode(left).left:=nil;
  358. left.free;
  359. left:=temp;
  360. result:=self.getcopy;
  361. tinlinenode(result).resultdef:=temp.resultdef;
  362. typecheckpass(result);
  363. end
  364. else
  365. Result:=inherited simplify(forinline);
  366. end;
  367. procedure tx86inlinenode.pass_generate_code_cpu;
  368. var
  369. paraarray : array[1..4] of tnode;
  370. i : integer;
  371. op: TAsmOp;
  372. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  373. var
  374. portnumber: tnode;
  375. begin
  376. portnumber:=left;
  377. secondpass(portnumber);
  378. if (portnumber.location.loc=LOC_CONSTANT) and
  379. (portnumber.location.value>=0) and
  380. (portnumber.location.value<=255) then
  381. begin
  382. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  383. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  384. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  385. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  386. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  387. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  388. end
  389. else
  390. begin
  391. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  392. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  393. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  394. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  395. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  396. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  397. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  398. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  399. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  400. end;
  401. end;
  402. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  403. var
  404. portnumber, portdata: tnode;
  405. begin
  406. portnumber:=tcallparanode(tcallparanode(left).right).left;
  407. portdata:=tcallparanode(left).left;
  408. secondpass(portdata);
  409. secondpass(portnumber);
  410. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  411. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  412. if (portnumber.location.loc=LOC_CONSTANT) and
  413. (portnumber.location.value>=0) and
  414. (portnumber.location.value<=255) then
  415. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  416. else
  417. begin
  418. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  419. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  420. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  421. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  422. end;
  423. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  424. end;
  425. procedure get_segreg(segreg:tregister);
  426. begin
  427. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  428. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  429. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  430. end;
  431. function GetConstInt(n: tnode): longint;
  432. begin
  433. Result:=0;
  434. if is_constintnode(n) then
  435. result:=tordconstnode(n).value.svalue
  436. else
  437. Message(type_e_constant_expr_expected);
  438. end;
  439. procedure GetParameters(count: longint);
  440. var
  441. i: longint;
  442. p: tnode;
  443. begin
  444. if (count=1) and
  445. (not (left is tcallparanode)) then
  446. paraarray[1]:=left
  447. else
  448. begin
  449. p:=left;
  450. for i := count downto 1 do
  451. begin
  452. paraarray[i]:=tcallparanode(p).paravalue;
  453. p:=tcallparanode(p).nextpara;
  454. end;
  455. end;
  456. end;
  457. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  458. var
  459. reg : tregister;
  460. begin
  461. if (l.loc<>LOC_MMXREGISTER) and
  462. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  463. begin
  464. reg:=tcgx86(cg).getmmxregister(list);
  465. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  466. location_freetemp(list,l);
  467. location_reset(l,LOC_MMXREGISTER,OS_M64);
  468. l.register:=reg;
  469. end;
  470. end;
  471. procedure location_make_ref(var loc: tlocation);
  472. var
  473. hloc: tlocation;
  474. begin
  475. case loc.loc of
  476. LOC_CREGISTER,
  477. LOC_REGISTER:
  478. begin
  479. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  480. hloc.reference.base:=loc.register;
  481. loc:=hloc;
  482. end;
  483. LOC_CREFERENCE,
  484. LOC_REFERENCE:
  485. begin
  486. end;
  487. else
  488. begin
  489. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  490. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  491. hloc.reference.base:=loc.register;
  492. loc:=hloc;
  493. end;
  494. end;
  495. end;
  496. begin
  497. FillChar(paraarray,sizeof(paraarray),0);
  498. case inlinenumber of
  499. in_x86_inportb:
  500. inport(NR_AL,S_B,u8inttype);
  501. in_x86_inportw:
  502. inport(NR_AX,S_W,u16inttype);
  503. in_x86_inportl:
  504. inport(NR_EAX,S_L,s32inttype);
  505. in_x86_outportb:
  506. outport(NR_AL,S_B,u8inttype);
  507. in_x86_outportw:
  508. outport(NR_AX,S_W,u16inttype);
  509. in_x86_outportl:
  510. outport(NR_EAX,S_L,s32inttype);
  511. in_x86_cli:
  512. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  513. in_x86_sti:
  514. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  515. in_x86_get_cs:
  516. get_segreg(NR_CS);
  517. in_x86_get_ss:
  518. get_segreg(NR_SS);
  519. in_x86_get_ds:
  520. get_segreg(NR_DS);
  521. in_x86_get_es:
  522. get_segreg(NR_ES);
  523. in_x86_get_fs:
  524. get_segreg(NR_FS);
  525. in_x86_get_gs:
  526. get_segreg(NR_GS);
  527. {$i x86mmsecond.inc}
  528. else
  529. inherited pass_generate_code_cpu;
  530. end;
  531. end;
  532. procedure tx86inlinenode.second_pi;
  533. begin
  534. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  535. emit_none(A_FLDPI,S_NO);
  536. tcgx86(cg).inc_fpu_stack;
  537. location.register:=NR_FPU_RESULT_REG;
  538. end;
  539. { load the FPU into the an fpu register }
  540. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  541. begin
  542. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  543. location.register:=NR_FPU_RESULT_REG;
  544. secondpass(lnode);
  545. case lnode.location.loc of
  546. LOC_FPUREGISTER:
  547. ;
  548. LOC_CFPUREGISTER:
  549. begin
  550. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  551. lnode.location.size,lnode.location.register,location.register);
  552. end;
  553. LOC_REFERENCE,LOC_CREFERENCE:
  554. begin
  555. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  556. lnode.location.size,lnode.location.size,
  557. lnode.location.reference,location.register);
  558. end;
  559. LOC_MMREGISTER,LOC_CMMREGISTER:
  560. begin
  561. location:=lnode.location;
  562. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,resultdef,false);
  563. end;
  564. else
  565. internalerror(309991);
  566. end;
  567. end;
  568. procedure tx86inlinenode.second_arctan_real;
  569. begin
  570. load_fpu_location(left);
  571. emit_none(A_FLD1,S_NO);
  572. emit_none(A_FPATAN,S_NO);
  573. end;
  574. procedure tx86inlinenode.second_abs_real;
  575. function needs_indirect:boolean; inline;
  576. begin
  577. result:=(tf_supports_packages in target_info.flags) and
  578. (target_info.system in systems_indirect_var_imports);
  579. end;
  580. var
  581. href : treference;
  582. sym : tasmsymbol;
  583. begin
  584. if use_vectorfpu(resultdef) then
  585. begin
  586. secondpass(left);
  587. if left.location.loc<>LOC_MMREGISTER then
  588. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  589. if UseAVX then
  590. begin
  591. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  592. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  593. end
  594. else
  595. location:=left.location;
  596. case tfloatdef(resultdef).floattype of
  597. s32real:
  598. begin
  599. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  600. reference_reset_symbol(href,sym,0,4,[]);
  601. current_module.add_extern_asmsym(sym);
  602. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  603. if UseAVX then
  604. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  605. A_VANDPS,S_XMM,href,left.location.register,location.register))
  606. else
  607. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  608. end;
  609. s64real:
  610. begin
  611. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  612. reference_reset_symbol(href,sym,0,4,[]);
  613. current_module.add_extern_asmsym(sym);
  614. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  615. if UseAVX then
  616. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  617. A_VANDPD,S_XMM,href,left.location.register,location.register))
  618. else
  619. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  620. end;
  621. else
  622. internalerror(200506081);
  623. end;
  624. end
  625. else
  626. begin
  627. load_fpu_location(left);
  628. emit_none(A_FABS,S_NO);
  629. end;
  630. end;
  631. procedure tx86inlinenode.second_round_real;
  632. begin
  633. {$ifdef x86_64}
  634. if use_vectorfpu(left.resultdef) then
  635. begin
  636. secondpass(left);
  637. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  638. location_reset(location,LOC_REGISTER,OS_S64);
  639. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  640. if UseAVX then
  641. case left.location.size of
  642. OS_F32:
  643. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  644. OS_F64:
  645. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  646. else
  647. internalerror(2007031402);
  648. end
  649. else
  650. case left.location.size of
  651. OS_F32:
  652. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  653. OS_F64:
  654. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  655. else
  656. internalerror(2007031402);
  657. end;
  658. end
  659. else
  660. {$endif x86_64}
  661. begin
  662. load_fpu_location(left);
  663. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  664. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  665. emit_ref(A_FISTP,S_IQ,location.reference);
  666. tcgx86(cg).dec_fpu_stack;
  667. emit_none(A_FWAIT,S_NO);
  668. end;
  669. end;
  670. procedure tx86inlinenode.second_trunc_real;
  671. var
  672. oldcw,newcw : treference;
  673. begin
  674. {$ifdef x86_64}
  675. if use_vectorfpu(left.resultdef) and
  676. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  677. begin
  678. secondpass(left);
  679. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  680. location_reset(location,LOC_REGISTER,OS_S64);
  681. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  682. if UseAVX then
  683. case left.location.size of
  684. OS_F32:
  685. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  686. OS_F64:
  687. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  688. else
  689. internalerror(2007031401);
  690. end
  691. else
  692. case left.location.size of
  693. OS_F32:
  694. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  695. OS_F64:
  696. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  697. else
  698. internalerror(2007031401);
  699. end;
  700. end
  701. else
  702. {$endif x86_64}
  703. begin
  704. if (current_settings.fputype>=fpu_sse3) then
  705. begin
  706. load_fpu_location(left);
  707. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  708. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  709. emit_ref(A_FISTTP,S_IQ,location.reference);
  710. tcgx86(cg).dec_fpu_stack;
  711. end
  712. else
  713. begin
  714. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  715. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  716. {$ifdef i8086}
  717. if current_settings.cputype<=cpu_286 then
  718. begin
  719. emit_ref(A_FSTCW,S_NO,newcw);
  720. emit_ref(A_FSTCW,S_NO,oldcw);
  721. emit_none(A_FWAIT,S_NO);
  722. end
  723. else
  724. {$endif i8086}
  725. begin
  726. emit_ref(A_FNSTCW,S_NO,newcw);
  727. emit_ref(A_FNSTCW,S_NO,oldcw);
  728. end;
  729. emit_const_ref(A_OR,S_W,$0f00,newcw);
  730. load_fpu_location(left);
  731. emit_ref(A_FLDCW,S_NO,newcw);
  732. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  733. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  734. emit_ref(A_FISTP,S_IQ,location.reference);
  735. tcgx86(cg).dec_fpu_stack;
  736. emit_ref(A_FLDCW,S_NO,oldcw);
  737. emit_none(A_FWAIT,S_NO);
  738. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  739. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  740. end;
  741. end;
  742. end;
  743. procedure tx86inlinenode.second_sqr_real;
  744. begin
  745. if use_vectorfpu(resultdef) then
  746. begin
  747. secondpass(left);
  748. location_reset(location,LOC_MMREGISTER,left.location.size);
  749. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  750. if UseAVX then
  751. begin
  752. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  753. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  754. end
  755. else
  756. begin
  757. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  758. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  759. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  760. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  761. end;
  762. end
  763. else
  764. begin
  765. load_fpu_location(left);
  766. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  767. end;
  768. end;
  769. procedure tx86inlinenode.second_sqrt_real;
  770. begin
  771. if use_vectorfpu(resultdef) then
  772. begin
  773. secondpass(left);
  774. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  775. location_reset(location,LOC_MMREGISTER,left.location.size);
  776. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  777. if UseAVX then
  778. case tfloatdef(resultdef).floattype of
  779. s32real:
  780. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  781. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  782. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  783. s64real:
  784. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  785. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  786. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  787. else
  788. internalerror(200510031);
  789. end
  790. else
  791. case tfloatdef(resultdef).floattype of
  792. s32real:
  793. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  794. s64real:
  795. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  796. else
  797. internalerror(200510031);
  798. end;
  799. end
  800. else
  801. begin
  802. load_fpu_location(left);
  803. emit_none(A_FSQRT,S_NO);
  804. end;
  805. end;
  806. procedure tx86inlinenode.second_ln_real;
  807. begin
  808. load_fpu_location(left);
  809. emit_none(A_FLDLN2,S_NO);
  810. emit_none(A_FXCH,S_NO);
  811. emit_none(A_FYL2X,S_NO);
  812. end;
  813. procedure tx86inlinenode.second_cos_real;
  814. begin
  815. {$ifdef i8086}
  816. { FCOS is 387+ }
  817. if current_settings.cputype < cpu_386 then
  818. begin
  819. inherited;
  820. exit;
  821. end;
  822. {$endif i8086}
  823. load_fpu_location(left);
  824. emit_none(A_FCOS,S_NO);
  825. end;
  826. procedure tx86inlinenode.second_sin_real;
  827. begin
  828. {$ifdef i8086}
  829. { FSIN is 387+ }
  830. if current_settings.cputype < cpu_386 then
  831. begin
  832. inherited;
  833. exit;
  834. end;
  835. {$endif i8086}
  836. load_fpu_location(left);
  837. emit_none(A_FSIN,S_NO)
  838. end;
  839. procedure tx86inlinenode.second_prefetch;
  840. var
  841. ref : treference;
  842. r : tregister;
  843. checkpointer_used : boolean;
  844. begin
  845. {$if defined(i386) or defined(i8086)}
  846. if current_settings.cputype>=cpu_Pentium3 then
  847. {$endif i386 or i8086}
  848. begin
  849. { do not call Checkpointer for left node }
  850. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  851. if checkpointer_used then
  852. node_change_local_switch(left,cs_checkpointer,false);
  853. secondpass(left);
  854. if checkpointer_used then
  855. node_change_local_switch(left,cs_checkpointer,false);
  856. case left.location.loc of
  857. LOC_CREFERENCE,
  858. LOC_REFERENCE:
  859. begin
  860. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  861. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  862. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  863. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  864. end;
  865. else
  866. { nothing to prefetch };
  867. end;
  868. end;
  869. end;
  870. procedure tx86inlinenode.second_abs_long;
  871. var
  872. hregister : tregister;
  873. opsize : tcgsize;
  874. hp : taicpu;
  875. begin
  876. {$if defined(i8086) or defined(i386)}
  877. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  878. begin
  879. opsize:=def_cgsize(left.resultdef);
  880. secondpass(left);
  881. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  882. location:=left.location;
  883. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  884. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  885. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  886. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  887. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  888. end
  889. else
  890. {$endif i8086 or i386}
  891. begin
  892. opsize:=def_cgsize(left.resultdef);
  893. secondpass(left);
  894. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  895. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  896. location:=left.location;
  897. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  898. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  899. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  900. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  901. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  902. hp.condition:=C_NS;
  903. current_asmdata.CurrAsmList.concat(hp);
  904. end;
  905. end;
  906. {*****************************************************************************
  907. INCLUDE/EXCLUDE GENERIC HANDLING
  908. *****************************************************************************}
  909. procedure tx86inlinenode.second_IncludeExclude;
  910. var
  911. hregister,
  912. hregister2: tregister;
  913. setbase : aint;
  914. bitsperop,l : longint;
  915. cgop : topcg;
  916. asmop : tasmop;
  917. opdef : tdef;
  918. opsize,
  919. orgsize: tcgsize;
  920. begin
  921. {$ifdef i8086}
  922. { BTS and BTR are 386+ }
  923. if current_settings.cputype < cpu_386 then
  924. begin
  925. inherited;
  926. exit;
  927. end;
  928. {$endif i8086}
  929. if is_smallset(tcallparanode(left).resultdef) then
  930. begin
  931. opdef:=tcallparanode(left).resultdef;
  932. opsize:=int_cgsize(opdef.size)
  933. end
  934. else
  935. begin
  936. opdef:=u32inttype;
  937. opsize:=OS_32;
  938. end;
  939. bitsperop:=(8*tcgsize2size[opsize]);
  940. secondpass(tcallparanode(left).left);
  941. secondpass(tcallparanode(tcallparanode(left).right).left);
  942. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  943. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  944. begin
  945. { calculate bit position }
  946. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  947. { determine operator }
  948. if inlinenumber=in_include_x_y then
  949. cgop:=OP_OR
  950. else
  951. begin
  952. cgop:=OP_AND;
  953. l:=not(l);
  954. end;
  955. case tcallparanode(left).left.location.loc of
  956. LOC_REFERENCE :
  957. begin
  958. inc(tcallparanode(left).left.location.reference.offset,
  959. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  960. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  961. end;
  962. LOC_CREGISTER :
  963. cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
  964. else
  965. internalerror(200405022);
  966. end;
  967. end
  968. else
  969. begin
  970. orgsize:=opsize;
  971. if opsize in [OS_8,OS_S8] then
  972. begin
  973. opdef:=u32inttype;
  974. opsize:=OS_32;
  975. end;
  976. { determine asm operator }
  977. if inlinenumber=in_include_x_y then
  978. asmop:=A_BTS
  979. else
  980. asmop:=A_BTR;
  981. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  982. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  983. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  984. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  985. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  986. else
  987. begin
  988. { second argument can't be an 8 bit register either }
  989. hregister2:=tcallparanode(left).left.location.register;
  990. if (orgsize in [OS_8,OS_S8]) then
  991. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  992. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  993. end;
  994. end;
  995. end;
  996. procedure tx86inlinenode.second_popcnt;
  997. var
  998. opsize: tcgsize;
  999. begin
  1000. secondpass(left);
  1001. opsize:=tcgsize2unsigned[left.location.size];
  1002. { no 8 Bit popcont }
  1003. if opsize=OS_8 then
  1004. opsize:=OS_16;
  1005. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1006. (left.location.size<>opsize) then
  1007. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1008. location_reset(location,LOC_REGISTER,opsize);
  1009. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1010. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1011. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1012. else
  1013. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1014. if resultdef.size=1 then
  1015. begin
  1016. location.size:=OS_8;
  1017. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1018. end;
  1019. end;
  1020. procedure tx86inlinenode.second_fma;
  1021. const
  1022. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1023. (
  1024. { positive product }
  1025. (
  1026. { positive third operand }
  1027. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1028. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1029. ),
  1030. { negative third operand }
  1031. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1032. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1033. )
  1034. ),
  1035. { negative product }
  1036. (
  1037. { positive third operand }
  1038. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1039. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1040. ),
  1041. { negative third operand }
  1042. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1043. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1044. )
  1045. )
  1046. );
  1047. var
  1048. paraarray : array[1..3] of tnode;
  1049. memop,
  1050. i : integer;
  1051. negop3,
  1052. negproduct,
  1053. gotmem : boolean;
  1054. begin
  1055. {$ifndef i8086}
  1056. if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
  1057. begin
  1058. negop3:=false;
  1059. negproduct:=false;
  1060. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1061. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1062. paraarray[3]:=tcallparanode(parameters).paravalue;
  1063. { check if a neg. node can be removed
  1064. this is possible because changing the sign of
  1065. a floating point number does not affect its absolute
  1066. value in any way
  1067. }
  1068. if paraarray[1].nodetype=unaryminusn then
  1069. begin
  1070. paraarray[1]:=tunarynode(paraarray[1]).left;
  1071. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1072. only no code is generated for it }
  1073. negproduct:=not(negproduct);
  1074. end;
  1075. if paraarray[2].nodetype=unaryminusn then
  1076. begin
  1077. paraarray[2]:=tunarynode(paraarray[2]).left;
  1078. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1079. only no code is generated for it }
  1080. negproduct:=not(negproduct);
  1081. end;
  1082. if paraarray[3].nodetype=unaryminusn then
  1083. begin
  1084. paraarray[3]:=tunarynode(paraarray[3]).left;
  1085. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1086. only no code is generated for it }
  1087. negop3:=true;
  1088. end;
  1089. for i:=1 to 3 do
  1090. secondpass(paraarray[i]);
  1091. { only one memory operand is allowed }
  1092. gotmem:=false;
  1093. memop:=0;
  1094. for i:=1 to 3 do
  1095. begin
  1096. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1097. begin
  1098. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1099. begin
  1100. memop:=i;
  1101. gotmem:=true;
  1102. end
  1103. else
  1104. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1105. end;
  1106. end;
  1107. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1108. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1109. if gotmem then
  1110. begin
  1111. case memop of
  1112. 1:
  1113. begin
  1114. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1115. paraarray[3].location.register,location.register,mms_movescalar);
  1116. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1117. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1118. end;
  1119. 2:
  1120. begin
  1121. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1122. paraarray[3].location.register,location.register,mms_movescalar);
  1123. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1124. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1125. end;
  1126. 3:
  1127. begin
  1128. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1129. paraarray[1].location.register,location.register,mms_movescalar);
  1130. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1131. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1132. end
  1133. else
  1134. internalerror(2014041301);
  1135. end;
  1136. end
  1137. else
  1138. begin
  1139. { try to use the location which is already in a temp. mm register as destination,
  1140. so the compiler might be able to re-use the register }
  1141. if paraarray[1].location.loc=LOC_MMREGISTER then
  1142. begin
  1143. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1144. paraarray[1].location.register,location.register,mms_movescalar);
  1145. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1146. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1147. end
  1148. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1149. begin
  1150. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1151. paraarray[2].location.register,location.register,mms_movescalar);
  1152. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1153. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1154. end
  1155. else
  1156. begin
  1157. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1158. paraarray[3].location.register,location.register,mms_movescalar);
  1159. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1160. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1161. end;
  1162. end;
  1163. end
  1164. else
  1165. {$endif i8086}
  1166. internalerror(2014032301);
  1167. end;
  1168. procedure tx86inlinenode.second_frac_real;
  1169. var
  1170. extrareg : TRegister;
  1171. begin
  1172. if use_vectorfpu(resultdef) then
  1173. begin
  1174. secondpass(left);
  1175. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1176. location_reset(location,LOC_MMREGISTER,left.location.size);
  1177. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1178. if UseAVX then
  1179. case tfloatdef(resultdef).floattype of
  1180. s32real:
  1181. begin
  1182. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1183. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1184. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1185. end;
  1186. s64real:
  1187. begin
  1188. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1189. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1190. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1191. end;
  1192. else
  1193. internalerror(2017052102);
  1194. end
  1195. else
  1196. begin
  1197. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1198. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1199. case tfloatdef(resultdef).floattype of
  1200. s32real:
  1201. begin
  1202. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1203. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1204. end;
  1205. s64real:
  1206. begin
  1207. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1208. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1209. end;
  1210. else
  1211. internalerror(2017052103);
  1212. end;
  1213. end;
  1214. end
  1215. else
  1216. internalerror(2017052101);
  1217. end;
  1218. procedure tx86inlinenode.second_int_real;
  1219. begin
  1220. if use_vectorfpu(resultdef) then
  1221. begin
  1222. secondpass(left);
  1223. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1224. location_reset(location,LOC_MMREGISTER,left.location.size);
  1225. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1226. if UseAVX then
  1227. case tfloatdef(resultdef).floattype of
  1228. s32real:
  1229. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1230. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1231. s64real:
  1232. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1233. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1234. else
  1235. internalerror(2017052105);
  1236. end
  1237. else
  1238. begin
  1239. case tfloatdef(resultdef).floattype of
  1240. s32real:
  1241. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1242. s64real:
  1243. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1244. else
  1245. internalerror(2017052106);
  1246. end;
  1247. end;
  1248. end
  1249. else
  1250. internalerror(2017052107);
  1251. end;
  1252. procedure tx86inlinenode.second_high;
  1253. var
  1254. donelab: tasmlabel;
  1255. hregister : tregister;
  1256. href : treference;
  1257. begin
  1258. secondpass(left);
  1259. if not(is_dynamic_array(left.resultdef)) then
  1260. Internalerror(2019122801);
  1261. { length in dynamic arrays is at offset -sizeof(pint) }
  1262. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1263. current_asmdata.getjumplabel(donelab);
  1264. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1265. flag is set in this case, so we can jump depending on it
  1266. when loading the actual high value, we have to take care later of the decreased value
  1267. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1268. we are using is not working as dec does not touch the carry flag }
  1269. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1270. { volatility of the dyn. array refers to the volatility of the
  1271. string pointer, not of the string data }
  1272. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1273. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1274. { if the string pointer is nil, the length is 0 -> reuse the register
  1275. that originally held the string pointer for the length, so that we
  1276. can keep the original nil/0 as length in that case }
  1277. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1278. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1279. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1280. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1281. location.register:=hregister;
  1282. end;
  1283. end.