nx86inl.pas 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function first_minmax: tnode; override;
  48. function simplify(forinline : boolean) : tnode; override;
  49. { second pass override to generate these nodes }
  50. procedure pass_generate_code_cpu;override;
  51. procedure second_IncludeExclude;override;
  52. procedure second_pi; override;
  53. procedure second_arctan_real; override;
  54. procedure second_abs_real; override;
  55. procedure second_round_real; override;
  56. procedure second_sqr_real; override;
  57. procedure second_sqrt_real; override;
  58. procedure second_ln_real; override;
  59. procedure second_cos_real; override;
  60. procedure second_sin_real; override;
  61. procedure second_trunc_real; override;
  62. procedure second_prefetch;override;
  63. procedure second_abs_long;override;
  64. procedure second_popcnt;override;
  65. procedure second_fma;override;
  66. procedure second_frac_real;override;
  67. procedure second_int_real;override;
  68. procedure second_high;override;
  69. procedure second_minmax;override;
  70. private
  71. procedure load_fpu_location(lnode: tnode);
  72. end;
  73. implementation
  74. uses
  75. systems,
  76. globtype,globals,
  77. verbose,compinnr,fmodule,
  78. defutil,
  79. aasmbase,aasmdata,aasmcpu,
  80. symconst,symtype,symdef,symcpu,
  81. ncnv,
  82. htypechk,
  83. cgbase,pass_1,pass_2,
  84. cpuinfo,cpubase,nutils,
  85. ncal,ncgutil,nld,ncon,
  86. tgobj,
  87. cga,cgutils,cgx86,cgobj,hlcgobj;
  88. {*****************************************************************************
  89. TX86INLINENODE
  90. *****************************************************************************}
  91. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  92. begin
  93. { only makes a difference for x86_64 }
  94. end;
  95. function tx86inlinenode.pass_typecheck_cpu: tnode;
  96. begin
  97. Result:=nil;
  98. case inlinenumber of
  99. in_x86_inportb:
  100. begin
  101. CheckParameters(1);
  102. resultdef:=u8inttype;
  103. end;
  104. in_x86_inportw:
  105. begin
  106. CheckParameters(1);
  107. resultdef:=u16inttype;
  108. end;
  109. in_x86_inportl:
  110. begin
  111. CheckParameters(1);
  112. resultdef:=s32inttype;
  113. end;
  114. in_x86_outportb,
  115. in_x86_outportw,
  116. in_x86_outportl:
  117. begin
  118. CheckParameters(2);
  119. resultdef:=voidtype;
  120. end;
  121. in_x86_cli,
  122. in_x86_sti:
  123. resultdef:=voidtype;
  124. in_x86_get_cs,
  125. in_x86_get_ss,
  126. in_x86_get_ds,
  127. in_x86_get_es,
  128. in_x86_get_fs,
  129. in_x86_get_gs:
  130. {$ifdef i8086}
  131. resultdef:=u16inttype;
  132. {$else i8086}
  133. resultdef:=s32inttype;
  134. {$endif i8086}
  135. { include automatically generated code }
  136. {$i x86mmtype.inc}
  137. else
  138. Result:=inherited pass_typecheck_cpu;
  139. end;
  140. end;
  141. function tx86inlinenode.first_cpu: tnode;
  142. begin
  143. Result:=nil;
  144. case inlinenumber of
  145. in_x86_inportb,
  146. in_x86_inportw,
  147. in_x86_inportl,
  148. in_x86_get_cs,
  149. in_x86_get_ss,
  150. in_x86_get_ds,
  151. in_x86_get_es,
  152. in_x86_get_fs,
  153. in_x86_get_gs:
  154. expectloc:=LOC_REGISTER;
  155. in_x86_outportb,
  156. in_x86_outportw,
  157. in_x86_outportl,
  158. in_x86_cli,
  159. in_x86_sti:
  160. expectloc:=LOC_VOID;
  161. { include automatically generated code }
  162. {$i x86mmfirst.inc}
  163. else
  164. Result:=inherited first_cpu;
  165. end;
  166. end;
  167. function tx86inlinenode.first_pi : tnode;
  168. begin
  169. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  170. begin
  171. expectloc:=LOC_FPUREGISTER;
  172. first_pi := nil;
  173. end
  174. else
  175. result:=inherited;
  176. end;
  177. function tx86inlinenode.first_arctan_real : tnode;
  178. begin
  179. {$ifdef i8086}
  180. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  181. so we need to use the RTL helper on these FPUs }
  182. if current_settings.cputype < cpu_386 then
  183. begin
  184. result := inherited;
  185. exit;
  186. end;
  187. {$endif i8086}
  188. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  189. begin
  190. expectloc:=LOC_FPUREGISTER;
  191. first_arctan_real := nil;
  192. end
  193. else
  194. result:=inherited;
  195. end;
  196. function tx86inlinenode.first_abs_real : tnode;
  197. begin
  198. if use_vectorfpu(resultdef) then
  199. expectloc:=LOC_MMREGISTER
  200. else
  201. expectloc:=LOC_FPUREGISTER;
  202. first_abs_real := nil;
  203. end;
  204. function tx86inlinenode.first_sqr_real : tnode;
  205. begin
  206. if use_vectorfpu(resultdef) then
  207. expectloc:=LOC_MMREGISTER
  208. else
  209. expectloc:=LOC_FPUREGISTER;
  210. first_sqr_real := nil;
  211. end;
  212. function tx86inlinenode.first_sqrt_real : tnode;
  213. begin
  214. if use_vectorfpu(resultdef) then
  215. expectloc:=LOC_MMREGISTER
  216. else
  217. expectloc:=LOC_FPUREGISTER;
  218. first_sqrt_real := nil;
  219. end;
  220. function tx86inlinenode.first_ln_real : tnode;
  221. begin
  222. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  223. begin
  224. expectloc:=LOC_FPUREGISTER;
  225. first_ln_real := nil;
  226. end
  227. else
  228. result:=inherited;
  229. end;
  230. function tx86inlinenode.first_cos_real : tnode;
  231. begin
  232. {$ifdef i8086}
  233. { FCOS is 387+ }
  234. if current_settings.cputype < cpu_386 then
  235. begin
  236. result := inherited;
  237. exit;
  238. end;
  239. {$endif i8086}
  240. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  241. begin
  242. expectloc:=LOC_FPUREGISTER;
  243. result:=nil;
  244. end
  245. else
  246. result:=inherited;
  247. end;
  248. function tx86inlinenode.first_sin_real : tnode;
  249. begin
  250. {$ifdef i8086}
  251. { FSIN is 387+ }
  252. if current_settings.cputype < cpu_386 then
  253. begin
  254. result := inherited;
  255. exit;
  256. end;
  257. {$endif i8086}
  258. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  259. begin
  260. expectloc:=LOC_FPUREGISTER;
  261. result:=nil;
  262. end
  263. else
  264. result:=inherited;
  265. end;
  266. function tx86inlinenode.first_round_real : tnode;
  267. begin
  268. maybe_remove_round_trunc_typeconv;
  269. {$ifdef x86_64}
  270. if use_vectorfpu(left.resultdef) then
  271. expectloc:=LOC_REGISTER
  272. else
  273. {$endif x86_64}
  274. expectloc:=LOC_REFERENCE;
  275. result:=nil;
  276. end;
  277. function tx86inlinenode.first_trunc_real: tnode;
  278. begin
  279. maybe_remove_round_trunc_typeconv;
  280. if (cs_opt_size in current_settings.optimizerswitches)
  281. {$ifdef x86_64}
  282. and not(use_vectorfpu(left.resultdef))
  283. {$endif x86_64}
  284. then
  285. result:=inherited
  286. else
  287. begin
  288. {$ifdef x86_64}
  289. if use_vectorfpu(left.resultdef) then
  290. expectloc:=LOC_REGISTER
  291. else
  292. {$endif x86_64}
  293. expectloc:=LOC_REFERENCE;
  294. result:=nil;
  295. end;
  296. end;
  297. function tx86inlinenode.first_popcnt: tnode;
  298. begin
  299. Result:=nil;
  300. {$ifndef i8086}
  301. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  302. {$ifdef i386}
  303. and not is_64bit(left.resultdef)
  304. {$endif i386}
  305. then
  306. expectloc:=LOC_REGISTER
  307. else
  308. {$endif not i8086}
  309. Result:=inherited first_popcnt
  310. end;
  311. function tx86inlinenode.first_fma : tnode;
  312. begin
  313. {$ifndef i8086}
  314. if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
  315. ((is_double(resultdef)) or (is_single(resultdef))) then
  316. begin
  317. expectloc:=LOC_MMREGISTER;
  318. Result:=nil;
  319. end
  320. else
  321. {$endif i8086}
  322. Result:=inherited first_fma;
  323. end;
  324. function tx86inlinenode.first_frac_real : tnode;
  325. begin
  326. if (current_settings.fputype>=fpu_sse41) and
  327. ((is_double(resultdef)) or (is_single(resultdef))) then
  328. begin
  329. maybe_remove_round_trunc_typeconv;
  330. expectloc:=LOC_MMREGISTER;
  331. Result:=nil;
  332. end
  333. else
  334. Result:=inherited first_frac_real;
  335. end;
  336. function tx86inlinenode.first_int_real : tnode;
  337. begin
  338. if (current_settings.fputype>=fpu_sse41) and
  339. ((is_double(resultdef)) or (is_single(resultdef))) then
  340. begin
  341. Result:=nil;
  342. expectloc:=LOC_MMREGISTER;
  343. end
  344. else
  345. Result:=inherited first_int_real;
  346. end;
  347. function tx86inlinenode.first_minmax: tnode;
  348. begin
  349. {$ifndef i8086}
  350. if
  351. {$ifdef i386}
  352. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  353. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  354. {$else i386}
  355. ((is_double(resultdef)) or (is_single(resultdef)))
  356. {$endif i386}
  357. then
  358. begin
  359. expectloc:=LOC_MMREGISTER;
  360. Result:=nil;
  361. end
  362. else
  363. {$endif i8086}
  364. Result:=inherited first_minmax;
  365. end;
  366. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  367. var
  368. temp : tnode;
  369. begin
  370. if (current_settings.fputype>=fpu_sse41) and
  371. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  372. not(nf_explicit in left.flags) and
  373. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  374. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  375. begin
  376. { get rid of the type conversion }
  377. temp:=ttypeconvnode(left).left;
  378. ttypeconvnode(left).left:=nil;
  379. left.free;
  380. left:=temp;
  381. result:=self.getcopy;
  382. tinlinenode(result).resultdef:=temp.resultdef;
  383. typecheckpass(result);
  384. end
  385. else
  386. Result:=inherited simplify(forinline);
  387. end;
  388. procedure tx86inlinenode.pass_generate_code_cpu;
  389. var
  390. paraarray : array[1..4] of tnode;
  391. i : integer;
  392. op: TAsmOp;
  393. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  394. var
  395. portnumber: tnode;
  396. begin
  397. portnumber:=left;
  398. secondpass(portnumber);
  399. if (portnumber.location.loc=LOC_CONSTANT) and
  400. (portnumber.location.value>=0) and
  401. (portnumber.location.value<=255) then
  402. begin
  403. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  404. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  405. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  406. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  407. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  408. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  409. end
  410. else
  411. begin
  412. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  413. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  414. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  415. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  416. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  417. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  418. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  419. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  420. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  421. end;
  422. end;
  423. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  424. var
  425. portnumber, portdata: tnode;
  426. begin
  427. portnumber:=tcallparanode(tcallparanode(left).right).left;
  428. portdata:=tcallparanode(left).left;
  429. secondpass(portdata);
  430. secondpass(portnumber);
  431. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  432. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  433. if (portnumber.location.loc=LOC_CONSTANT) and
  434. (portnumber.location.value>=0) and
  435. (portnumber.location.value<=255) then
  436. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  437. else
  438. begin
  439. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  440. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  441. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  442. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  443. end;
  444. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  445. end;
  446. procedure get_segreg(segreg:tregister);
  447. begin
  448. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  449. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  450. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  451. end;
  452. function GetConstInt(n: tnode): longint;
  453. begin
  454. Result:=0;
  455. if is_constintnode(n) then
  456. result:=tordconstnode(n).value.svalue
  457. else
  458. Message(type_e_constant_expr_expected);
  459. end;
  460. procedure GetParameters(count: longint);
  461. var
  462. i: longint;
  463. p: tnode;
  464. begin
  465. if (count=1) and
  466. (not (left is tcallparanode)) then
  467. paraarray[1]:=left
  468. else
  469. begin
  470. p:=left;
  471. for i := count downto 1 do
  472. begin
  473. paraarray[i]:=tcallparanode(p).paravalue;
  474. p:=tcallparanode(p).nextpara;
  475. end;
  476. end;
  477. end;
  478. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  479. var
  480. reg : tregister;
  481. begin
  482. if (l.loc<>LOC_MMXREGISTER) and
  483. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  484. begin
  485. reg:=tcgx86(cg).getmmxregister(list);
  486. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  487. location_freetemp(list,l);
  488. location_reset(l,LOC_MMXREGISTER,OS_M64);
  489. l.register:=reg;
  490. end;
  491. end;
  492. procedure location_make_ref(var loc: tlocation);
  493. var
  494. hloc: tlocation;
  495. begin
  496. case loc.loc of
  497. LOC_CREGISTER,
  498. LOC_REGISTER:
  499. begin
  500. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  501. hloc.reference.base:=loc.register;
  502. loc:=hloc;
  503. end;
  504. LOC_CREFERENCE,
  505. LOC_REFERENCE:
  506. begin
  507. end;
  508. else
  509. begin
  510. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  511. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  512. hloc.reference.base:=loc.register;
  513. loc:=hloc;
  514. end;
  515. end;
  516. end;
  517. begin
  518. FillChar(paraarray,sizeof(paraarray),0);
  519. case inlinenumber of
  520. in_x86_inportb:
  521. inport(NR_AL,S_B,u8inttype);
  522. in_x86_inportw:
  523. inport(NR_AX,S_W,u16inttype);
  524. in_x86_inportl:
  525. inport(NR_EAX,S_L,s32inttype);
  526. in_x86_outportb:
  527. outport(NR_AL,S_B,u8inttype);
  528. in_x86_outportw:
  529. outport(NR_AX,S_W,u16inttype);
  530. in_x86_outportl:
  531. outport(NR_EAX,S_L,s32inttype);
  532. in_x86_cli:
  533. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  534. in_x86_sti:
  535. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  536. in_x86_get_cs:
  537. get_segreg(NR_CS);
  538. in_x86_get_ss:
  539. get_segreg(NR_SS);
  540. in_x86_get_ds:
  541. get_segreg(NR_DS);
  542. in_x86_get_es:
  543. get_segreg(NR_ES);
  544. in_x86_get_fs:
  545. get_segreg(NR_FS);
  546. in_x86_get_gs:
  547. get_segreg(NR_GS);
  548. {$i x86mmsecond.inc}
  549. else
  550. inherited pass_generate_code_cpu;
  551. end;
  552. end;
  553. procedure tx86inlinenode.second_pi;
  554. begin
  555. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  556. emit_none(A_FLDPI,S_NO);
  557. tcgx86(cg).inc_fpu_stack;
  558. location.register:=NR_FPU_RESULT_REG;
  559. end;
  560. { load the FPU into the an fpu register }
  561. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  562. begin
  563. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  564. location.register:=NR_FPU_RESULT_REG;
  565. secondpass(lnode);
  566. case lnode.location.loc of
  567. LOC_FPUREGISTER:
  568. ;
  569. LOC_CFPUREGISTER:
  570. begin
  571. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  572. lnode.location.size,lnode.location.register,location.register);
  573. end;
  574. LOC_REFERENCE,LOC_CREFERENCE:
  575. begin
  576. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  577. lnode.location.size,lnode.location.size,
  578. lnode.location.reference,location.register);
  579. end;
  580. LOC_MMREGISTER,LOC_CMMREGISTER:
  581. begin
  582. location:=lnode.location;
  583. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,resultdef,false);
  584. end;
  585. else
  586. internalerror(309991);
  587. end;
  588. end;
  589. procedure tx86inlinenode.second_arctan_real;
  590. begin
  591. load_fpu_location(left);
  592. emit_none(A_FLD1,S_NO);
  593. emit_none(A_FPATAN,S_NO);
  594. end;
  595. procedure tx86inlinenode.second_abs_real;
  596. function needs_indirect:boolean; inline;
  597. begin
  598. result:=(tf_supports_packages in target_info.flags) and
  599. (target_info.system in systems_indirect_var_imports);
  600. end;
  601. var
  602. href : treference;
  603. sym : tasmsymbol;
  604. begin
  605. if use_vectorfpu(resultdef) then
  606. begin
  607. secondpass(left);
  608. if left.location.loc<>LOC_MMREGISTER then
  609. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  610. if UseAVX then
  611. begin
  612. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  613. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  614. end
  615. else
  616. location:=left.location;
  617. case tfloatdef(resultdef).floattype of
  618. s32real:
  619. begin
  620. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  621. reference_reset_symbol(href,sym,0,4,[]);
  622. current_module.add_extern_asmsym(sym);
  623. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  624. if UseAVX then
  625. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  626. A_VANDPS,S_XMM,href,left.location.register,location.register))
  627. else
  628. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  629. end;
  630. s64real:
  631. begin
  632. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  633. reference_reset_symbol(href,sym,0,4,[]);
  634. current_module.add_extern_asmsym(sym);
  635. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  636. if UseAVX then
  637. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  638. A_VANDPD,S_XMM,href,left.location.register,location.register))
  639. else
  640. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  641. end;
  642. else
  643. internalerror(200506081);
  644. end;
  645. end
  646. else
  647. begin
  648. load_fpu_location(left);
  649. emit_none(A_FABS,S_NO);
  650. end;
  651. end;
  652. procedure tx86inlinenode.second_round_real;
  653. begin
  654. {$ifdef x86_64}
  655. if use_vectorfpu(left.resultdef) then
  656. begin
  657. secondpass(left);
  658. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  659. location_reset(location,LOC_REGISTER,OS_S64);
  660. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  661. if UseAVX then
  662. case left.location.size of
  663. OS_F32:
  664. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  665. OS_F64:
  666. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  667. else
  668. internalerror(2007031402);
  669. end
  670. else
  671. case left.location.size of
  672. OS_F32:
  673. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  674. OS_F64:
  675. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  676. else
  677. internalerror(2007031404);
  678. end;
  679. end
  680. else
  681. {$endif x86_64}
  682. begin
  683. load_fpu_location(left);
  684. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  685. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  686. emit_ref(A_FISTP,S_IQ,location.reference);
  687. tcgx86(cg).dec_fpu_stack;
  688. emit_none(A_FWAIT,S_NO);
  689. end;
  690. end;
  691. procedure tx86inlinenode.second_trunc_real;
  692. var
  693. oldcw,newcw : treference;
  694. begin
  695. {$ifdef x86_64}
  696. if use_vectorfpu(left.resultdef) and
  697. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  698. begin
  699. secondpass(left);
  700. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  701. location_reset(location,LOC_REGISTER,OS_S64);
  702. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  703. if UseAVX then
  704. case left.location.size of
  705. OS_F32:
  706. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  707. OS_F64:
  708. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  709. else
  710. internalerror(2007031401);
  711. end
  712. else
  713. case left.location.size of
  714. OS_F32:
  715. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  716. OS_F64:
  717. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  718. else
  719. internalerror(2007031403);
  720. end;
  721. end
  722. else
  723. {$endif x86_64}
  724. begin
  725. if (current_settings.fputype>=fpu_sse3) then
  726. begin
  727. load_fpu_location(left);
  728. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  729. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  730. emit_ref(A_FISTTP,S_IQ,location.reference);
  731. tcgx86(cg).dec_fpu_stack;
  732. end
  733. else
  734. begin
  735. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  736. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  737. {$ifdef i8086}
  738. if current_settings.cputype<=cpu_286 then
  739. begin
  740. emit_ref(A_FSTCW,S_NO,newcw);
  741. emit_ref(A_FSTCW,S_NO,oldcw);
  742. emit_none(A_FWAIT,S_NO);
  743. end
  744. else
  745. {$endif i8086}
  746. begin
  747. emit_ref(A_FNSTCW,S_NO,newcw);
  748. emit_ref(A_FNSTCW,S_NO,oldcw);
  749. end;
  750. emit_const_ref(A_OR,S_W,$0f00,newcw);
  751. load_fpu_location(left);
  752. emit_ref(A_FLDCW,S_NO,newcw);
  753. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  754. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  755. emit_ref(A_FISTP,S_IQ,location.reference);
  756. tcgx86(cg).dec_fpu_stack;
  757. emit_ref(A_FLDCW,S_NO,oldcw);
  758. emit_none(A_FWAIT,S_NO);
  759. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  760. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  761. end;
  762. end;
  763. end;
  764. procedure tx86inlinenode.second_sqr_real;
  765. begin
  766. if use_vectorfpu(resultdef) then
  767. begin
  768. secondpass(left);
  769. location_reset(location,LOC_MMREGISTER,left.location.size);
  770. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  771. if UseAVX then
  772. begin
  773. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  774. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  775. end
  776. else
  777. begin
  778. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  779. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  780. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  781. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  782. end;
  783. end
  784. else
  785. begin
  786. load_fpu_location(left);
  787. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  788. end;
  789. end;
  790. procedure tx86inlinenode.second_sqrt_real;
  791. begin
  792. if use_vectorfpu(resultdef) then
  793. begin
  794. secondpass(left);
  795. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  796. location_reset(location,LOC_MMREGISTER,left.location.size);
  797. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  798. if UseAVX then
  799. case tfloatdef(resultdef).floattype of
  800. s32real:
  801. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  802. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  803. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  804. s64real:
  805. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  806. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  807. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  808. else
  809. internalerror(200510031);
  810. end
  811. else
  812. case tfloatdef(resultdef).floattype of
  813. s32real:
  814. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  815. s64real:
  816. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  817. else
  818. internalerror(2005100303);
  819. end;
  820. end
  821. else
  822. begin
  823. load_fpu_location(left);
  824. emit_none(A_FSQRT,S_NO);
  825. end;
  826. end;
  827. procedure tx86inlinenode.second_ln_real;
  828. begin
  829. load_fpu_location(left);
  830. emit_none(A_FLDLN2,S_NO);
  831. emit_none(A_FXCH,S_NO);
  832. emit_none(A_FYL2X,S_NO);
  833. end;
  834. procedure tx86inlinenode.second_cos_real;
  835. begin
  836. {$ifdef i8086}
  837. { FCOS is 387+ }
  838. if current_settings.cputype < cpu_386 then
  839. begin
  840. inherited;
  841. exit;
  842. end;
  843. {$endif i8086}
  844. load_fpu_location(left);
  845. emit_none(A_FCOS,S_NO);
  846. end;
  847. procedure tx86inlinenode.second_sin_real;
  848. begin
  849. {$ifdef i8086}
  850. { FSIN is 387+ }
  851. if current_settings.cputype < cpu_386 then
  852. begin
  853. inherited;
  854. exit;
  855. end;
  856. {$endif i8086}
  857. load_fpu_location(left);
  858. emit_none(A_FSIN,S_NO)
  859. end;
  860. procedure tx86inlinenode.second_prefetch;
  861. var
  862. ref : treference;
  863. r : tregister;
  864. checkpointer_used : boolean;
  865. begin
  866. {$if defined(i386) or defined(i8086)}
  867. if current_settings.cputype>=cpu_Pentium3 then
  868. {$endif i386 or i8086}
  869. begin
  870. { do not call Checkpointer for left node }
  871. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  872. if checkpointer_used then
  873. node_change_local_switch(left,cs_checkpointer,false);
  874. secondpass(left);
  875. if checkpointer_used then
  876. node_change_local_switch(left,cs_checkpointer,false);
  877. case left.location.loc of
  878. LOC_CREFERENCE,
  879. LOC_REFERENCE:
  880. begin
  881. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  882. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  883. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  884. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  885. end;
  886. else
  887. { nothing to prefetch };
  888. end;
  889. end;
  890. end;
  891. procedure tx86inlinenode.second_abs_long;
  892. var
  893. hregister : tregister;
  894. opsize : tcgsize;
  895. hp : taicpu;
  896. begin
  897. {$if defined(i8086) or defined(i386)}
  898. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  899. begin
  900. opsize:=def_cgsize(left.resultdef);
  901. secondpass(left);
  902. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  903. location:=left.location;
  904. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  905. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  906. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  907. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  908. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  909. end
  910. else
  911. {$endif i8086 or i386}
  912. begin
  913. opsize:=def_cgsize(left.resultdef);
  914. secondpass(left);
  915. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  916. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  917. location:=left.location;
  918. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  919. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  920. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  921. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  922. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  923. hp.condition:=C_NS;
  924. current_asmdata.CurrAsmList.concat(hp);
  925. end;
  926. end;
  927. {*****************************************************************************
  928. INCLUDE/EXCLUDE GENERIC HANDLING
  929. *****************************************************************************}
  930. procedure tx86inlinenode.second_IncludeExclude;
  931. var
  932. hregister,
  933. hregister2: tregister;
  934. setbase : aint;
  935. bitsperop,l : longint;
  936. cgop : topcg;
  937. asmop : tasmop;
  938. opdef : tdef;
  939. opsize,
  940. orgsize: tcgsize;
  941. begin
  942. {$ifdef i8086}
  943. { BTS and BTR are 386+ }
  944. if current_settings.cputype < cpu_386 then
  945. begin
  946. inherited;
  947. exit;
  948. end;
  949. {$endif i8086}
  950. if is_smallset(tcallparanode(left).resultdef) then
  951. begin
  952. opdef:=tcallparanode(left).resultdef;
  953. opsize:=int_cgsize(opdef.size)
  954. end
  955. else
  956. begin
  957. opdef:=u32inttype;
  958. opsize:=OS_32;
  959. end;
  960. bitsperop:=(8*tcgsize2size[opsize]);
  961. secondpass(tcallparanode(left).left);
  962. secondpass(tcallparanode(tcallparanode(left).right).left);
  963. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  964. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  965. begin
  966. { calculate bit position }
  967. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  968. { determine operator }
  969. if inlinenumber=in_include_x_y then
  970. cgop:=OP_OR
  971. else
  972. begin
  973. cgop:=OP_AND;
  974. l:=not(l);
  975. end;
  976. case tcallparanode(left).left.location.loc of
  977. LOC_REFERENCE :
  978. begin
  979. inc(tcallparanode(left).left.location.reference.offset,
  980. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  981. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  982. end;
  983. LOC_CREGISTER :
  984. cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
  985. else
  986. internalerror(200405022);
  987. end;
  988. end
  989. else
  990. begin
  991. orgsize:=opsize;
  992. if opsize in [OS_8,OS_S8] then
  993. begin
  994. opdef:=u32inttype;
  995. opsize:=OS_32;
  996. end;
  997. { determine asm operator }
  998. if inlinenumber=in_include_x_y then
  999. asmop:=A_BTS
  1000. else
  1001. asmop:=A_BTR;
  1002. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  1003. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  1004. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  1005. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  1006. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  1007. else
  1008. begin
  1009. { second argument can't be an 8 bit register either }
  1010. hregister2:=tcallparanode(left).left.location.register;
  1011. if (orgsize in [OS_8,OS_S8]) then
  1012. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  1013. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  1014. end;
  1015. end;
  1016. end;
  1017. procedure tx86inlinenode.second_popcnt;
  1018. var
  1019. opsize: tcgsize;
  1020. begin
  1021. secondpass(left);
  1022. opsize:=tcgsize2unsigned[left.location.size];
  1023. { no 8 Bit popcont }
  1024. if opsize=OS_8 then
  1025. opsize:=OS_16;
  1026. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1027. (left.location.size<>opsize) then
  1028. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1029. location_reset(location,LOC_REGISTER,opsize);
  1030. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1031. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1032. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1033. else
  1034. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1035. if resultdef.size=1 then
  1036. begin
  1037. location.size:=OS_8;
  1038. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1039. end;
  1040. end;
  1041. procedure tx86inlinenode.second_fma;
  1042. const
  1043. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1044. (
  1045. { positive product }
  1046. (
  1047. { positive third operand }
  1048. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1049. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1050. ),
  1051. { negative third operand }
  1052. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1053. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1054. )
  1055. ),
  1056. { negative product }
  1057. (
  1058. { positive third operand }
  1059. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1060. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1061. ),
  1062. { negative third operand }
  1063. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1064. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1065. )
  1066. )
  1067. );
  1068. var
  1069. paraarray : array[1..3] of tnode;
  1070. memop,
  1071. i : integer;
  1072. negop3,
  1073. negproduct,
  1074. gotmem : boolean;
  1075. begin
  1076. {$ifndef i8086}
  1077. if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
  1078. begin
  1079. negop3:=false;
  1080. negproduct:=false;
  1081. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1082. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1083. paraarray[3]:=tcallparanode(parameters).paravalue;
  1084. { check if a neg. node can be removed
  1085. this is possible because changing the sign of
  1086. a floating point number does not affect its absolute
  1087. value in any way
  1088. }
  1089. if paraarray[1].nodetype=unaryminusn then
  1090. begin
  1091. paraarray[1]:=tunarynode(paraarray[1]).left;
  1092. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1093. only no code is generated for it }
  1094. negproduct:=not(negproduct);
  1095. end;
  1096. if paraarray[2].nodetype=unaryminusn then
  1097. begin
  1098. paraarray[2]:=tunarynode(paraarray[2]).left;
  1099. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1100. only no code is generated for it }
  1101. negproduct:=not(negproduct);
  1102. end;
  1103. if paraarray[3].nodetype=unaryminusn then
  1104. begin
  1105. paraarray[3]:=tunarynode(paraarray[3]).left;
  1106. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1107. only no code is generated for it }
  1108. negop3:=true;
  1109. end;
  1110. for i:=1 to 3 do
  1111. secondpass(paraarray[i]);
  1112. { only one memory operand is allowed }
  1113. gotmem:=false;
  1114. memop:=0;
  1115. for i:=1 to 3 do
  1116. begin
  1117. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1118. begin
  1119. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1120. begin
  1121. memop:=i;
  1122. gotmem:=true;
  1123. end
  1124. else
  1125. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1126. end;
  1127. end;
  1128. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1129. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1130. if gotmem then
  1131. begin
  1132. case memop of
  1133. 1:
  1134. begin
  1135. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1136. paraarray[3].location.register,location.register,mms_movescalar);
  1137. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1138. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1139. end;
  1140. 2:
  1141. begin
  1142. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1143. paraarray[3].location.register,location.register,mms_movescalar);
  1144. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1145. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1146. end;
  1147. 3:
  1148. begin
  1149. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1150. paraarray[1].location.register,location.register,mms_movescalar);
  1151. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1152. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1153. end
  1154. else
  1155. internalerror(2014041301);
  1156. end;
  1157. end
  1158. else
  1159. begin
  1160. { try to use the location which is already in a temp. mm register as destination,
  1161. so the compiler might be able to re-use the register }
  1162. if paraarray[1].location.loc=LOC_MMREGISTER then
  1163. begin
  1164. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1165. paraarray[1].location.register,location.register,mms_movescalar);
  1166. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1167. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1168. end
  1169. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1170. begin
  1171. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1172. paraarray[2].location.register,location.register,mms_movescalar);
  1173. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1174. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1175. end
  1176. else
  1177. begin
  1178. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1179. paraarray[3].location.register,location.register,mms_movescalar);
  1180. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1181. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1182. end;
  1183. end;
  1184. end
  1185. else
  1186. {$endif i8086}
  1187. internalerror(2014032301);
  1188. end;
  1189. procedure tx86inlinenode.second_frac_real;
  1190. var
  1191. extrareg : TRegister;
  1192. begin
  1193. if use_vectorfpu(resultdef) then
  1194. begin
  1195. secondpass(left);
  1196. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1197. location_reset(location,LOC_MMREGISTER,left.location.size);
  1198. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1199. if UseAVX then
  1200. case tfloatdef(resultdef).floattype of
  1201. s32real:
  1202. begin
  1203. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1204. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1205. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1206. end;
  1207. s64real:
  1208. begin
  1209. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1210. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1211. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1212. end;
  1213. else
  1214. internalerror(2017052102);
  1215. end
  1216. else
  1217. begin
  1218. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1219. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1220. case tfloatdef(resultdef).floattype of
  1221. s32real:
  1222. begin
  1223. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1224. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1225. end;
  1226. s64real:
  1227. begin
  1228. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1229. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1230. end;
  1231. else
  1232. internalerror(2017052103);
  1233. end;
  1234. end;
  1235. end
  1236. else
  1237. internalerror(2017052101);
  1238. end;
  1239. procedure tx86inlinenode.second_int_real;
  1240. begin
  1241. if use_vectorfpu(resultdef) then
  1242. begin
  1243. secondpass(left);
  1244. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1245. location_reset(location,LOC_MMREGISTER,left.location.size);
  1246. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1247. if UseAVX then
  1248. case tfloatdef(resultdef).floattype of
  1249. s32real:
  1250. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1251. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1252. s64real:
  1253. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1254. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1255. else
  1256. internalerror(2017052105);
  1257. end
  1258. else
  1259. begin
  1260. case tfloatdef(resultdef).floattype of
  1261. s32real:
  1262. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1263. s64real:
  1264. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1265. else
  1266. internalerror(2017052106);
  1267. end;
  1268. end;
  1269. end
  1270. else
  1271. internalerror(2017052107);
  1272. end;
  1273. procedure tx86inlinenode.second_high;
  1274. var
  1275. donelab: tasmlabel;
  1276. hregister : tregister;
  1277. href : treference;
  1278. begin
  1279. secondpass(left);
  1280. if not(is_dynamic_array(left.resultdef)) then
  1281. Internalerror(2019122809);
  1282. { length in dynamic arrays is at offset -sizeof(pint) }
  1283. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1284. current_asmdata.getjumplabel(donelab);
  1285. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1286. flag is set in this case, so we can jump depending on it
  1287. when loading the actual high value, we have to take care later of the decreased value
  1288. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1289. we are using is not working as dec does not touch the carry flag }
  1290. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1291. { volatility of the dyn. array refers to the volatility of the
  1292. string pointer, not of the string data }
  1293. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1294. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1295. { if the string pointer is nil, the length is 0 -> reuse the register
  1296. that originally held the string pointer for the length, so that we
  1297. can keep the original nil/0 as length in that case }
  1298. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1299. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1300. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1301. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1302. location.register:=hregister;
  1303. end;
  1304. procedure tx86inlinenode.second_minmax;
  1305. const
  1306. oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
  1307. (
  1308. (
  1309. (A_MINSS,A_MINSD),
  1310. (A_VMINSS,A_VMINSD)
  1311. ),
  1312. (
  1313. (A_MAXSS,A_MAXSD),
  1314. (A_VMAXSS,A_VMAXSD)
  1315. )
  1316. );
  1317. var
  1318. paraarray : array[1..2] of tnode;
  1319. memop,
  1320. i : integer;
  1321. gotmem : boolean;
  1322. op: TAsmOp;
  1323. begin
  1324. {$ifndef i8086}
  1325. if
  1326. {$ifdef i386}
  1327. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  1328. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  1329. {$else i386}
  1330. is_single(resultdef) or is_double(resultdef)
  1331. {$endif i386}
  1332. then
  1333. begin
  1334. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1335. paraarray[2]:=tcallparanode(parameters).paravalue;
  1336. for i:=low(paraarray) to high(paraarray) do
  1337. secondpass(paraarray[i]);
  1338. { only one memory operand is allowed }
  1339. gotmem:=false;
  1340. memop:=0;
  1341. for i:=low(paraarray) to high(paraarray) do
  1342. begin
  1343. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1344. begin
  1345. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1346. begin
  1347. memop:=i;
  1348. gotmem:=true;
  1349. end
  1350. else
  1351. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1352. end;
  1353. end;
  1354. { due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
  1355. one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
  1356. if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
  1357. begin
  1358. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
  1359. gotmem:=false;
  1360. end;
  1361. op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
  1362. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1363. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1364. if gotmem then
  1365. begin
  1366. if UseAVX then
  1367. case memop of
  1368. 1:
  1369. emit_ref_reg_reg(op,S_NO,
  1370. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1371. 2:
  1372. emit_ref_reg_reg(op,S_NO,
  1373. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1374. else
  1375. internalerror(2020120504);
  1376. end
  1377. else
  1378. case memop of
  1379. 1:
  1380. begin
  1381. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1382. paraarray[2].location.register,location.register,mms_movescalar);
  1383. emit_ref_reg(op,S_NO,
  1384. paraarray[1].location.reference,location.register);
  1385. end;
  1386. 2:
  1387. begin
  1388. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1389. paraarray[1].location.register,location.register,mms_movescalar);
  1390. emit_ref_reg(op,S_NO,
  1391. paraarray[2].location.reference,location.register);
  1392. end;
  1393. else
  1394. internalerror(2020120601);
  1395. end;
  1396. end
  1397. else
  1398. begin
  1399. if UseAVX then
  1400. emit_reg_reg_reg(op,S_NO,
  1401. paraarray[2].location.register,paraarray[1].location.register,location.register)
  1402. else
  1403. begin
  1404. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1405. paraarray[1].location.register,location.register,mms_movescalar);
  1406. emit_reg_reg(op,S_NO,
  1407. paraarray[2].location.register,location.register)
  1408. end;
  1409. end;
  1410. end
  1411. else
  1412. {$endif i8086}
  1413. internalerror(2020120503);
  1414. end;
  1415. end.