nx86inl.pas 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. { first pass override
  25. so that the code generator will actually generate
  26. these nodes.
  27. }
  28. function first_pi: tnode ; override;
  29. function first_arctan_real: tnode; override;
  30. function first_abs_real: tnode; override;
  31. function first_sqr_real: tnode; override;
  32. function first_sqrt_real: tnode; override;
  33. function first_ln_real: tnode; override;
  34. function first_cos_real: tnode; override;
  35. function first_sin_real: tnode; override;
  36. function first_round_real: tnode; override;
  37. function first_trunc_real: tnode; override;
  38. function first_popcnt: tnode; override;
  39. function first_fma: tnode; override;
  40. { second pass override to generate these nodes }
  41. procedure second_IncludeExclude;override;
  42. procedure second_pi; override;
  43. procedure second_arctan_real; override;
  44. procedure second_abs_real; override;
  45. procedure second_round_real; override;
  46. procedure second_sqr_real; override;
  47. procedure second_sqrt_real; override;
  48. procedure second_ln_real; override;
  49. procedure second_cos_real; override;
  50. procedure second_sin_real; override;
  51. procedure second_trunc_real; override;
  52. procedure second_prefetch;override;
  53. {$ifndef i8086}
  54. procedure second_abs_long;override;
  55. {$endif not i8086}
  56. procedure second_popcnt;override;
  57. procedure second_fma;override;
  58. private
  59. procedure load_fpu_location(lnode: tnode);
  60. end;
  61. implementation
  62. uses
  63. systems,
  64. globtype,globals,
  65. cutils,verbose,
  66. symconst,
  67. defutil,
  68. aasmbase,aasmtai,aasmdata,aasmcpu,
  69. symtype,symdef,symcpu,
  70. cgbase,pass_2,
  71. cpuinfo,cpubase,paramgr,
  72. nbas,ncon,ncal,ncnv,nld,ncgutil,
  73. tgobj,
  74. cga,cgutils,cgx86,cgobj,hlcgobj;
  75. {*****************************************************************************
  76. TX86INLINENODE
  77. *****************************************************************************}
  78. function tx86inlinenode.first_pi : tnode;
  79. begin
  80. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  81. begin
  82. expectloc:=LOC_FPUREGISTER;
  83. first_pi := nil;
  84. end
  85. else
  86. result:=inherited;
  87. end;
  88. function tx86inlinenode.first_arctan_real : tnode;
  89. begin
  90. {$ifdef i8086}
  91. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  92. so we need to use the RTL helper on these FPUs }
  93. if current_settings.cputype < cpu_386 then
  94. begin
  95. result := inherited;
  96. exit;
  97. end;
  98. {$endif i8086}
  99. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  100. begin
  101. expectloc:=LOC_FPUREGISTER;
  102. first_arctan_real := nil;
  103. end
  104. else
  105. result:=inherited;
  106. end;
  107. function tx86inlinenode.first_abs_real : tnode;
  108. begin
  109. if use_vectorfpu(resultdef) then
  110. expectloc:=LOC_MMREGISTER
  111. else
  112. expectloc:=LOC_FPUREGISTER;
  113. first_abs_real := nil;
  114. end;
  115. function tx86inlinenode.first_sqr_real : tnode;
  116. begin
  117. if use_vectorfpu(resultdef) then
  118. expectloc:=LOC_MMREGISTER
  119. else
  120. expectloc:=LOC_FPUREGISTER;
  121. first_sqr_real := nil;
  122. end;
  123. function tx86inlinenode.first_sqrt_real : tnode;
  124. begin
  125. if use_vectorfpu(resultdef) then
  126. expectloc:=LOC_MMREGISTER
  127. else
  128. expectloc:=LOC_FPUREGISTER;
  129. first_sqrt_real := nil;
  130. end;
  131. function tx86inlinenode.first_ln_real : tnode;
  132. begin
  133. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  134. begin
  135. expectloc:=LOC_FPUREGISTER;
  136. first_ln_real := nil;
  137. end
  138. else
  139. result:=inherited;
  140. end;
  141. function tx86inlinenode.first_cos_real : tnode;
  142. begin
  143. {$ifdef i8086}
  144. { FCOS is 387+ }
  145. if current_settings.cputype < cpu_386 then
  146. begin
  147. result := inherited;
  148. exit;
  149. end;
  150. {$endif i8086}
  151. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  152. begin
  153. expectloc:=LOC_FPUREGISTER;
  154. result:=nil;
  155. end
  156. else
  157. result:=inherited;
  158. end;
  159. function tx86inlinenode.first_sin_real : tnode;
  160. begin
  161. {$ifdef i8086}
  162. { FSIN is 387+ }
  163. if current_settings.cputype < cpu_386 then
  164. begin
  165. result := inherited;
  166. exit;
  167. end;
  168. {$endif i8086}
  169. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  170. begin
  171. expectloc:=LOC_FPUREGISTER;
  172. result:=nil;
  173. end
  174. else
  175. result:=inherited;
  176. end;
  177. function tx86inlinenode.first_round_real : tnode;
  178. begin
  179. {$ifdef x86_64}
  180. if use_vectorfpu(left.resultdef) then
  181. expectloc:=LOC_REGISTER
  182. else
  183. {$endif x86_64}
  184. expectloc:=LOC_REFERENCE;
  185. result:=nil;
  186. end;
  187. function tx86inlinenode.first_trunc_real: tnode;
  188. begin
  189. if (cs_opt_size in current_settings.optimizerswitches)
  190. {$ifdef x86_64}
  191. and not(use_vectorfpu(left.resultdef))
  192. {$endif x86_64}
  193. then
  194. result:=inherited
  195. else
  196. begin
  197. {$ifdef x86_64}
  198. if use_vectorfpu(left.resultdef) then
  199. expectloc:=LOC_REGISTER
  200. else
  201. {$endif x86_64}
  202. expectloc:=LOC_REFERENCE;
  203. result:=nil;
  204. end;
  205. end;
  206. function tx86inlinenode.first_popcnt: tnode;
  207. begin
  208. Result:=nil;
  209. {$ifndef i8086}
  210. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  211. {$ifdef i386}
  212. and not is_64bit(left.resultdef)
  213. {$endif i386}
  214. then
  215. expectloc:=LOC_REGISTER
  216. else
  217. {$endif not i8086}
  218. Result:=inherited first_popcnt
  219. end;
  220. function tx86inlinenode.first_fma : tnode;
  221. begin
  222. if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
  223. ((is_double(resultdef)) or (is_single(resultdef))) then
  224. begin
  225. expectloc:=LOC_MMREGISTER;
  226. Result:=nil;
  227. end
  228. else
  229. Result:=inherited first_fma;
  230. end;
  231. procedure tx86inlinenode.second_pi;
  232. begin
  233. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  234. emit_none(A_FLDPI,S_NO);
  235. tcgx86(cg).inc_fpu_stack;
  236. location.register:=NR_FPU_RESULT_REG;
  237. end;
  238. { load the FPU into the an fpu register }
  239. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  240. begin
  241. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  242. location.register:=NR_FPU_RESULT_REG;
  243. secondpass(lnode);
  244. case lnode.location.loc of
  245. LOC_FPUREGISTER:
  246. ;
  247. LOC_CFPUREGISTER:
  248. begin
  249. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  250. lnode.location.size,lnode.location.register,location.register);
  251. end;
  252. LOC_REFERENCE,LOC_CREFERENCE:
  253. begin
  254. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  255. lnode.location.size,lnode.location.size,
  256. lnode.location.reference,location.register);
  257. end;
  258. LOC_MMREGISTER,LOC_CMMREGISTER:
  259. begin
  260. location:=lnode.location;
  261. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,resultdef,false);
  262. end;
  263. else
  264. internalerror(309991);
  265. end;
  266. end;
  267. procedure tx86inlinenode.second_arctan_real;
  268. begin
  269. load_fpu_location(left);
  270. emit_none(A_FLD1,S_NO);
  271. emit_none(A_FPATAN,S_NO);
  272. end;
  273. procedure tx86inlinenode.second_abs_real;
  274. var
  275. href : treference;
  276. begin
  277. if use_vectorfpu(resultdef) then
  278. begin
  279. secondpass(left);
  280. if left.location.loc<>LOC_MMREGISTER then
  281. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  282. if UseAVX then
  283. begin
  284. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  285. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  286. end
  287. else
  288. location:=left.location;
  289. case tfloatdef(resultdef).floattype of
  290. s32real:
  291. begin
  292. reference_reset_symbol(href,current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE'),0,4);
  293. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  294. if UseAVX then
  295. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  296. A_VANDPS,S_XMM,href,left.location.register,location.register))
  297. else
  298. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  299. end;
  300. s64real:
  301. begin
  302. reference_reset_symbol(href,current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE'),0,4);
  303. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  304. if UseAVX then
  305. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  306. A_VANDPD,S_XMM,href,left.location.register,location.register))
  307. else
  308. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  309. end;
  310. else
  311. internalerror(200506081);
  312. end;
  313. end
  314. else
  315. begin
  316. load_fpu_location(left);
  317. emit_none(A_FABS,S_NO);
  318. end;
  319. end;
  320. procedure tx86inlinenode.second_round_real;
  321. begin
  322. {$ifdef x86_64}
  323. if use_vectorfpu(left.resultdef) then
  324. begin
  325. secondpass(left);
  326. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  327. location_reset(location,LOC_REGISTER,OS_S64);
  328. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  329. if UseAVX then
  330. case left.location.size of
  331. OS_F32:
  332. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_Q,left.location.register,location.register));
  333. OS_F64:
  334. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_Q,left.location.register,location.register));
  335. else
  336. internalerror(2007031402);
  337. end
  338. else
  339. case left.location.size of
  340. OS_F32:
  341. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_Q,left.location.register,location.register));
  342. OS_F64:
  343. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_Q,left.location.register,location.register));
  344. else
  345. internalerror(2007031402);
  346. end;
  347. end
  348. else
  349. {$endif x86_64}
  350. begin
  351. load_fpu_location(left);
  352. location_reset_ref(location,LOC_REFERENCE,OS_S64,0);
  353. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  354. emit_ref(A_FISTP,S_IQ,location.reference);
  355. tcgx86(cg).dec_fpu_stack;
  356. emit_none(A_FWAIT,S_NO);
  357. end;
  358. end;
  359. procedure tx86inlinenode.second_trunc_real;
  360. var
  361. oldcw,newcw : treference;
  362. begin
  363. {$ifdef x86_64}
  364. if use_vectorfpu(left.resultdef) and
  365. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  366. begin
  367. secondpass(left);
  368. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  369. location_reset(location,LOC_REGISTER,OS_S64);
  370. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  371. if UseAVX then
  372. case left.location.size of
  373. OS_F32:
  374. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_Q,left.location.register,location.register));
  375. OS_F64:
  376. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_Q,left.location.register,location.register));
  377. else
  378. internalerror(2007031401);
  379. end
  380. else
  381. case left.location.size of
  382. OS_F32:
  383. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_Q,left.location.register,location.register));
  384. OS_F64:
  385. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_Q,left.location.register,location.register));
  386. else
  387. internalerror(2007031401);
  388. end;
  389. end
  390. else
  391. {$endif x86_64}
  392. begin
  393. if (current_settings.fputype>=fpu_sse3) then
  394. begin
  395. load_fpu_location(left);
  396. location_reset_ref(location,LOC_REFERENCE,OS_S64,0);
  397. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  398. emit_ref(A_FISTTP,S_IQ,location.reference);
  399. tcgx86(cg).dec_fpu_stack;
  400. end
  401. else
  402. begin
  403. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  404. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  405. {$ifdef i8086}
  406. if current_settings.cputype<=cpu_286 then
  407. begin
  408. emit_ref(A_FSTCW,S_NO,newcw);
  409. emit_ref(A_FSTCW,S_NO,oldcw);
  410. emit_none(A_FWAIT,S_NO);
  411. end
  412. else
  413. {$endif i8086}
  414. begin
  415. emit_ref(A_FNSTCW,S_NO,newcw);
  416. emit_ref(A_FNSTCW,S_NO,oldcw);
  417. end;
  418. emit_const_ref(A_OR,S_W,$0f00,newcw);
  419. load_fpu_location(left);
  420. emit_ref(A_FLDCW,S_NO,newcw);
  421. location_reset_ref(location,LOC_REFERENCE,OS_S64,0);
  422. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  423. emit_ref(A_FISTP,S_IQ,location.reference);
  424. tcgx86(cg).dec_fpu_stack;
  425. emit_ref(A_FLDCW,S_NO,oldcw);
  426. emit_none(A_FWAIT,S_NO);
  427. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  428. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  429. end;
  430. end;
  431. end;
  432. procedure tx86inlinenode.second_sqr_real;
  433. begin
  434. if use_vectorfpu(resultdef) then
  435. begin
  436. secondpass(left);
  437. location_reset(location,LOC_MMREGISTER,left.location.size);
  438. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  439. if UseAVX then
  440. begin
  441. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  442. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  443. end
  444. else
  445. begin
  446. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  447. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  448. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  449. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  450. end;
  451. end
  452. else
  453. begin
  454. load_fpu_location(left);
  455. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  456. end;
  457. end;
  458. procedure tx86inlinenode.second_sqrt_real;
  459. begin
  460. if use_vectorfpu(resultdef) then
  461. begin
  462. secondpass(left);
  463. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  464. location_reset(location,LOC_MMREGISTER,left.location.size);
  465. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  466. if UseAVX then
  467. case tfloatdef(resultdef).floattype of
  468. s32real:
  469. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_XMM,left.location.register,location.register,location.register));
  470. s64real:
  471. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_XMM,left.location.register,location.register,location.register));
  472. else
  473. internalerror(200510031);
  474. end
  475. else
  476. case tfloatdef(resultdef).floattype of
  477. s32real:
  478. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_XMM,left.location.register,location.register));
  479. s64real:
  480. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_XMM,left.location.register,location.register));
  481. else
  482. internalerror(200510031);
  483. end;
  484. end
  485. else
  486. begin
  487. load_fpu_location(left);
  488. emit_none(A_FSQRT,S_NO);
  489. end;
  490. end;
  491. procedure tx86inlinenode.second_ln_real;
  492. begin
  493. load_fpu_location(left);
  494. emit_none(A_FLDLN2,S_NO);
  495. emit_none(A_FXCH,S_NO);
  496. emit_none(A_FYL2X,S_NO);
  497. end;
  498. procedure tx86inlinenode.second_cos_real;
  499. begin
  500. {$ifdef i8086}
  501. { FCOS is 387+ }
  502. if current_settings.cputype < cpu_386 then
  503. begin
  504. inherited;
  505. exit;
  506. end;
  507. {$endif i8086}
  508. load_fpu_location(left);
  509. emit_none(A_FCOS,S_NO);
  510. end;
  511. procedure tx86inlinenode.second_sin_real;
  512. begin
  513. {$ifdef i8086}
  514. { FSIN is 387+ }
  515. if current_settings.cputype < cpu_386 then
  516. begin
  517. inherited;
  518. exit;
  519. end;
  520. {$endif i8086}
  521. load_fpu_location(left);
  522. emit_none(A_FSIN,S_NO)
  523. end;
  524. procedure tx86inlinenode.second_prefetch;
  525. var
  526. ref : treference;
  527. r : tregister;
  528. begin
  529. {$if defined(i386) or defined(i8086)}
  530. if current_settings.cputype>=cpu_Pentium3 then
  531. {$endif i386 or i8086}
  532. begin
  533. secondpass(left);
  534. case left.location.loc of
  535. LOC_CREFERENCE,
  536. LOC_REFERENCE:
  537. begin
  538. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  539. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  540. reference_reset_base(ref,r,0,left.location.reference.alignment);
  541. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  542. end;
  543. else
  544. internalerror(200402021);
  545. end;
  546. end;
  547. end;
  548. {$ifndef i8086}
  549. procedure tx86inlinenode.second_abs_long;
  550. var
  551. hregister : tregister;
  552. opsize : tcgsize;
  553. hp : taicpu;
  554. begin
  555. {$ifdef i386}
  556. if current_settings.cputype<cpu_Pentium2 then
  557. begin
  558. opsize:=def_cgsize(left.resultdef);
  559. secondpass(left);
  560. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  561. location:=left.location;
  562. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  563. emit_reg_reg(A_MOV,S_L,left.location.register,location.register);
  564. emit_const_reg(A_SAR,tcgsize2opsize[opsize],31,left.location.register);
  565. emit_reg_reg(A_XOR,S_L,left.location.register,location.register);
  566. emit_reg_reg(A_SUB,S_L,left.location.register,location.register);
  567. end
  568. else
  569. {$endif i386}
  570. begin
  571. opsize:=def_cgsize(left.resultdef);
  572. secondpass(left);
  573. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  574. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  575. location:=left.location;
  576. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  577. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  578. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  579. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  580. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  581. hp.condition:=C_NS;
  582. current_asmdata.CurrAsmList.concat(hp);
  583. end;
  584. end;
  585. {$endif not i8086}
  586. {*****************************************************************************
  587. INCLUDE/EXCLUDE GENERIC HANDLING
  588. *****************************************************************************}
  589. procedure tx86inlinenode.second_IncludeExclude;
  590. var
  591. hregister,
  592. hregister2: tregister;
  593. setbase : aint;
  594. bitsperop,l : longint;
  595. cgop : topcg;
  596. asmop : tasmop;
  597. opdef : tdef;
  598. opsize,
  599. orgsize: tcgsize;
  600. begin
  601. {$ifdef i8086}
  602. { BTS and BTR are 386+ }
  603. if current_settings.cputype < cpu_386 then
  604. begin
  605. inherited;
  606. exit;
  607. end;
  608. {$endif i8086}
  609. if is_smallset(tcallparanode(left).resultdef) then
  610. begin
  611. opdef:=tcallparanode(left).resultdef;
  612. opsize:=int_cgsize(opdef.size)
  613. end
  614. else
  615. begin
  616. opdef:=u32inttype;
  617. opsize:=OS_32;
  618. end;
  619. bitsperop:=(8*tcgsize2size[opsize]);
  620. secondpass(tcallparanode(left).left);
  621. secondpass(tcallparanode(tcallparanode(left).right).left);
  622. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  623. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  624. begin
  625. { calculate bit position }
  626. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  627. { determine operator }
  628. if inlinenumber=in_include_x_y then
  629. cgop:=OP_OR
  630. else
  631. begin
  632. cgop:=OP_AND;
  633. l:=not(l);
  634. end;
  635. case tcallparanode(left).left.location.loc of
  636. LOC_REFERENCE :
  637. begin
  638. inc(tcallparanode(left).left.location.reference.offset,
  639. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  640. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  641. end;
  642. LOC_CREGISTER :
  643. cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
  644. else
  645. internalerror(200405022);
  646. end;
  647. end
  648. else
  649. begin
  650. orgsize:=opsize;
  651. if opsize in [OS_8,OS_S8] then
  652. begin
  653. opdef:=u32inttype;
  654. opsize:=OS_32;
  655. end;
  656. { determine asm operator }
  657. if inlinenumber=in_include_x_y then
  658. asmop:=A_BTS
  659. else
  660. asmop:=A_BTR;
  661. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  662. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,setbase);
  663. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  664. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  665. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  666. else
  667. begin
  668. { second argument can't be an 8 bit register either }
  669. hregister2:=tcallparanode(left).left.location.register;
  670. if (orgsize in [OS_8,OS_S8]) then
  671. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  672. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  673. end;
  674. end;
  675. end;
  676. procedure tx86inlinenode.second_popcnt;
  677. var
  678. opsize: tcgsize;
  679. begin
  680. secondpass(left);
  681. opsize:=tcgsize2unsigned[left.location.size];
  682. { no 8 Bit popcont }
  683. if opsize=OS_8 then
  684. opsize:=OS_16;
  685. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  686. (left.location.size<>opsize) then
  687. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  688. location_reset(location,LOC_REGISTER,opsize);
  689. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  690. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  691. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  692. else
  693. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  694. end;
  695. procedure tx86inlinenode.second_fma;
  696. const
  697. op : array[s32real..s64real,0..3] of TAsmOp = ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  698. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD));
  699. var
  700. paraarray : array[1..3] of tnode;
  701. memop,
  702. i : integer;
  703. gotmem : boolean;
  704. begin
  705. if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
  706. begin
  707. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  708. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  709. paraarray[3]:=tcallparanode(parameters).paravalue;
  710. for i:=1 to 3 do
  711. secondpass(paraarray[i]);
  712. { only one memory operand is allowed }
  713. gotmem:=false;
  714. memop:=0;
  715. for i:=1 to 3 do
  716. begin
  717. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  718. begin
  719. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  720. begin
  721. memop:=i;
  722. gotmem:=true;
  723. end
  724. else
  725. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  726. end;
  727. end;
  728. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  729. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  730. if gotmem then
  731. begin
  732. case memop of
  733. 1:
  734. begin
  735. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  736. paraarray[3].location.register,location.register,mms_movescalar);
  737. emit_ref_reg_reg(op[tfloatdef(resultdef).floattype,memop],S_NO,
  738. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  739. end;
  740. 2:
  741. begin
  742. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  743. paraarray[3].location.register,location.register,mms_movescalar);
  744. emit_ref_reg_reg(op[tfloatdef(resultdef).floattype,memop],S_NO,
  745. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  746. end;
  747. 3:
  748. begin
  749. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  750. paraarray[1].location.register,location.register,mms_movescalar);
  751. emit_ref_reg_reg(op[tfloatdef(resultdef).floattype,memop],S_NO,
  752. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  753. end
  754. else
  755. internalerror(2014041301);
  756. end;
  757. end
  758. else
  759. begin
  760. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  761. paraarray[3].location.register,location.register,mms_movescalar);
  762. emit_reg_reg_reg(op[tfloatdef(resultdef).floattype,0],S_NO,
  763. paraarray[1].location.register,paraarray[2].location.register,location.register);
  764. end;
  765. end
  766. else
  767. internalerror(2014032301);
  768. end;
  769. end.