nx86inl.pas 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. { first pass override
  28. so that the code generator will actually generate
  29. these nodes.
  30. }
  31. function first_pi: tnode ; override;
  32. function first_arctan_real: tnode; override;
  33. function first_abs_real: tnode; override;
  34. function first_sqr_real: tnode; override;
  35. function first_sqrt_real: tnode; override;
  36. function first_ln_real: tnode; override;
  37. function first_cos_real: tnode; override;
  38. function first_sin_real: tnode; override;
  39. function first_round_real: tnode; override;
  40. function first_trunc_real: tnode; override;
  41. function first_popcnt: tnode; override;
  42. function first_fma: tnode; override;
  43. { second pass override to generate these nodes }
  44. procedure second_IncludeExclude;override;
  45. procedure second_pi; override;
  46. procedure second_arctan_real; override;
  47. procedure second_abs_real; override;
  48. procedure second_round_real; override;
  49. procedure second_sqr_real; override;
  50. procedure second_sqrt_real; override;
  51. procedure second_ln_real; override;
  52. procedure second_cos_real; override;
  53. procedure second_sin_real; override;
  54. procedure second_trunc_real; override;
  55. procedure second_prefetch;override;
  56. {$ifndef i8086}
  57. procedure second_abs_long;override;
  58. {$endif not i8086}
  59. procedure second_popcnt;override;
  60. procedure second_fma;override;
  61. private
  62. procedure load_fpu_location(lnode: tnode);
  63. end;
  64. implementation
  65. uses
  66. systems,
  67. globtype,globals,
  68. verbose,compinnr,
  69. defutil,
  70. aasmbase,aasmdata,aasmcpu,
  71. symtype,symdef,symcpu,
  72. cgbase,pass_2,
  73. cpuinfo,cpubase,nutils,
  74. ncal,ncgutil,
  75. tgobj,
  76. cga,cgutils,cgx86,cgobj,hlcgobj;
  77. {*****************************************************************************
  78. TX86INLINENODE
  79. *****************************************************************************}
  80. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  81. begin
  82. { only makes a difference for x86_64 }
  83. end;
  84. function tx86inlinenode.first_pi : tnode;
  85. begin
  86. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  87. begin
  88. expectloc:=LOC_FPUREGISTER;
  89. first_pi := nil;
  90. end
  91. else
  92. result:=inherited;
  93. end;
  94. function tx86inlinenode.first_arctan_real : tnode;
  95. begin
  96. {$ifdef i8086}
  97. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  98. so we need to use the RTL helper on these FPUs }
  99. if current_settings.cputype < cpu_386 then
  100. begin
  101. result := inherited;
  102. exit;
  103. end;
  104. {$endif i8086}
  105. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  106. begin
  107. expectloc:=LOC_FPUREGISTER;
  108. first_arctan_real := nil;
  109. end
  110. else
  111. result:=inherited;
  112. end;
  113. function tx86inlinenode.first_abs_real : tnode;
  114. begin
  115. if use_vectorfpu(resultdef) then
  116. expectloc:=LOC_MMREGISTER
  117. else
  118. expectloc:=LOC_FPUREGISTER;
  119. first_abs_real := nil;
  120. end;
  121. function tx86inlinenode.first_sqr_real : tnode;
  122. begin
  123. if use_vectorfpu(resultdef) then
  124. expectloc:=LOC_MMREGISTER
  125. else
  126. expectloc:=LOC_FPUREGISTER;
  127. first_sqr_real := nil;
  128. end;
  129. function tx86inlinenode.first_sqrt_real : tnode;
  130. begin
  131. if use_vectorfpu(resultdef) then
  132. expectloc:=LOC_MMREGISTER
  133. else
  134. expectloc:=LOC_FPUREGISTER;
  135. first_sqrt_real := nil;
  136. end;
  137. function tx86inlinenode.first_ln_real : tnode;
  138. begin
  139. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  140. begin
  141. expectloc:=LOC_FPUREGISTER;
  142. first_ln_real := nil;
  143. end
  144. else
  145. result:=inherited;
  146. end;
  147. function tx86inlinenode.first_cos_real : tnode;
  148. begin
  149. {$ifdef i8086}
  150. { FCOS is 387+ }
  151. if current_settings.cputype < cpu_386 then
  152. begin
  153. result := inherited;
  154. exit;
  155. end;
  156. {$endif i8086}
  157. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  158. begin
  159. expectloc:=LOC_FPUREGISTER;
  160. result:=nil;
  161. end
  162. else
  163. result:=inherited;
  164. end;
  165. function tx86inlinenode.first_sin_real : tnode;
  166. begin
  167. {$ifdef i8086}
  168. { FSIN is 387+ }
  169. if current_settings.cputype < cpu_386 then
  170. begin
  171. result := inherited;
  172. exit;
  173. end;
  174. {$endif i8086}
  175. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  176. begin
  177. expectloc:=LOC_FPUREGISTER;
  178. result:=nil;
  179. end
  180. else
  181. result:=inherited;
  182. end;
  183. function tx86inlinenode.first_round_real : tnode;
  184. begin
  185. maybe_remove_round_trunc_typeconv;
  186. {$ifdef x86_64}
  187. if use_vectorfpu(left.resultdef) then
  188. expectloc:=LOC_REGISTER
  189. else
  190. {$endif x86_64}
  191. expectloc:=LOC_REFERENCE;
  192. result:=nil;
  193. end;
  194. function tx86inlinenode.first_trunc_real: tnode;
  195. begin
  196. maybe_remove_round_trunc_typeconv;
  197. if (cs_opt_size in current_settings.optimizerswitches)
  198. {$ifdef x86_64}
  199. and not(use_vectorfpu(left.resultdef))
  200. {$endif x86_64}
  201. then
  202. result:=inherited
  203. else
  204. begin
  205. {$ifdef x86_64}
  206. if use_vectorfpu(left.resultdef) then
  207. expectloc:=LOC_REGISTER
  208. else
  209. {$endif x86_64}
  210. expectloc:=LOC_REFERENCE;
  211. result:=nil;
  212. end;
  213. end;
  214. function tx86inlinenode.first_popcnt: tnode;
  215. begin
  216. Result:=nil;
  217. {$ifndef i8086}
  218. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  219. {$ifdef i386}
  220. and not is_64bit(left.resultdef)
  221. {$endif i386}
  222. then
  223. expectloc:=LOC_REGISTER
  224. else
  225. {$endif not i8086}
  226. Result:=inherited first_popcnt
  227. end;
  228. function tx86inlinenode.first_fma : tnode;
  229. begin
  230. {$ifndef i8086}
  231. if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
  232. ((is_double(resultdef)) or (is_single(resultdef))) then
  233. begin
  234. expectloc:=LOC_MMREGISTER;
  235. Result:=nil;
  236. end
  237. else
  238. {$endif i8086}
  239. Result:=inherited first_fma;
  240. end;
  241. procedure tx86inlinenode.second_pi;
  242. begin
  243. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  244. emit_none(A_FLDPI,S_NO);
  245. tcgx86(cg).inc_fpu_stack;
  246. location.register:=NR_FPU_RESULT_REG;
  247. end;
  248. { load the FPU into the an fpu register }
  249. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  250. begin
  251. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  252. location.register:=NR_FPU_RESULT_REG;
  253. secondpass(lnode);
  254. case lnode.location.loc of
  255. LOC_FPUREGISTER:
  256. ;
  257. LOC_CFPUREGISTER:
  258. begin
  259. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  260. lnode.location.size,lnode.location.register,location.register);
  261. end;
  262. LOC_REFERENCE,LOC_CREFERENCE:
  263. begin
  264. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  265. lnode.location.size,lnode.location.size,
  266. lnode.location.reference,location.register);
  267. end;
  268. LOC_MMREGISTER,LOC_CMMREGISTER:
  269. begin
  270. location:=lnode.location;
  271. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,resultdef,false);
  272. end;
  273. else
  274. internalerror(309991);
  275. end;
  276. end;
  277. procedure tx86inlinenode.second_arctan_real;
  278. begin
  279. load_fpu_location(left);
  280. emit_none(A_FLD1,S_NO);
  281. emit_none(A_FPATAN,S_NO);
  282. end;
  283. procedure tx86inlinenode.second_abs_real;
  284. var
  285. href : treference;
  286. begin
  287. if use_vectorfpu(resultdef) then
  288. begin
  289. secondpass(left);
  290. if left.location.loc<>LOC_MMREGISTER then
  291. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  292. if UseAVX then
  293. begin
  294. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  295. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  296. end
  297. else
  298. location:=left.location;
  299. case tfloatdef(resultdef).floattype of
  300. s32real:
  301. begin
  302. reference_reset_symbol(href,current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA),0,4,[]);
  303. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  304. if UseAVX then
  305. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  306. A_VANDPS,S_XMM,href,left.location.register,location.register))
  307. else
  308. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  309. end;
  310. s64real:
  311. begin
  312. reference_reset_symbol(href,current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA),0,4,[]);
  313. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  314. if UseAVX then
  315. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  316. A_VANDPD,S_XMM,href,left.location.register,location.register))
  317. else
  318. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  319. end;
  320. else
  321. internalerror(200506081);
  322. end;
  323. end
  324. else
  325. begin
  326. load_fpu_location(left);
  327. emit_none(A_FABS,S_NO);
  328. end;
  329. end;
  330. procedure tx86inlinenode.second_round_real;
  331. begin
  332. {$ifdef x86_64}
  333. if use_vectorfpu(left.resultdef) then
  334. begin
  335. secondpass(left);
  336. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  337. location_reset(location,LOC_REGISTER,OS_S64);
  338. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  339. if UseAVX then
  340. case left.location.size of
  341. OS_F32:
  342. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  343. OS_F64:
  344. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  345. else
  346. internalerror(2007031402);
  347. end
  348. else
  349. case left.location.size of
  350. OS_F32:
  351. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  352. OS_F64:
  353. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  354. else
  355. internalerror(2007031402);
  356. end;
  357. end
  358. else
  359. {$endif x86_64}
  360. begin
  361. load_fpu_location(left);
  362. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  363. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  364. emit_ref(A_FISTP,S_IQ,location.reference);
  365. tcgx86(cg).dec_fpu_stack;
  366. emit_none(A_FWAIT,S_NO);
  367. end;
  368. end;
  369. procedure tx86inlinenode.second_trunc_real;
  370. var
  371. oldcw,newcw : treference;
  372. begin
  373. {$ifdef x86_64}
  374. if use_vectorfpu(left.resultdef) and
  375. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  376. begin
  377. secondpass(left);
  378. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  379. location_reset(location,LOC_REGISTER,OS_S64);
  380. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  381. if UseAVX then
  382. case left.location.size of
  383. OS_F32:
  384. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  385. OS_F64:
  386. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  387. else
  388. internalerror(2007031401);
  389. end
  390. else
  391. case left.location.size of
  392. OS_F32:
  393. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  394. OS_F64:
  395. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  396. else
  397. internalerror(2007031401);
  398. end;
  399. end
  400. else
  401. {$endif x86_64}
  402. begin
  403. if (current_settings.fputype>=fpu_sse3) then
  404. begin
  405. load_fpu_location(left);
  406. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  407. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  408. emit_ref(A_FISTTP,S_IQ,location.reference);
  409. tcgx86(cg).dec_fpu_stack;
  410. end
  411. else
  412. begin
  413. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  414. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  415. {$ifdef i8086}
  416. if current_settings.cputype<=cpu_286 then
  417. begin
  418. emit_ref(A_FSTCW,S_NO,newcw);
  419. emit_ref(A_FSTCW,S_NO,oldcw);
  420. emit_none(A_FWAIT,S_NO);
  421. end
  422. else
  423. {$endif i8086}
  424. begin
  425. emit_ref(A_FNSTCW,S_NO,newcw);
  426. emit_ref(A_FNSTCW,S_NO,oldcw);
  427. end;
  428. emit_const_ref(A_OR,S_W,$0f00,newcw);
  429. load_fpu_location(left);
  430. emit_ref(A_FLDCW,S_NO,newcw);
  431. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  432. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  433. emit_ref(A_FISTP,S_IQ,location.reference);
  434. tcgx86(cg).dec_fpu_stack;
  435. emit_ref(A_FLDCW,S_NO,oldcw);
  436. emit_none(A_FWAIT,S_NO);
  437. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  438. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  439. end;
  440. end;
  441. end;
  442. procedure tx86inlinenode.second_sqr_real;
  443. begin
  444. if use_vectorfpu(resultdef) then
  445. begin
  446. secondpass(left);
  447. location_reset(location,LOC_MMREGISTER,left.location.size);
  448. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  449. if UseAVX then
  450. begin
  451. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  452. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  453. end
  454. else
  455. begin
  456. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  457. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  458. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  459. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  460. end;
  461. end
  462. else
  463. begin
  464. load_fpu_location(left);
  465. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  466. end;
  467. end;
  468. procedure tx86inlinenode.second_sqrt_real;
  469. begin
  470. if use_vectorfpu(resultdef) then
  471. begin
  472. secondpass(left);
  473. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  474. location_reset(location,LOC_MMREGISTER,left.location.size);
  475. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  476. if UseAVX then
  477. case tfloatdef(resultdef).floattype of
  478. s32real:
  479. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  480. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,location.register,location.register));
  481. s64real:
  482. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,location.register,location.register));
  483. else
  484. internalerror(200510031);
  485. end
  486. else
  487. case tfloatdef(resultdef).floattype of
  488. s32real:
  489. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  490. s64real:
  491. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  492. else
  493. internalerror(200510031);
  494. end;
  495. end
  496. else
  497. begin
  498. load_fpu_location(left);
  499. emit_none(A_FSQRT,S_NO);
  500. end;
  501. end;
  502. procedure tx86inlinenode.second_ln_real;
  503. begin
  504. load_fpu_location(left);
  505. emit_none(A_FLDLN2,S_NO);
  506. emit_none(A_FXCH,S_NO);
  507. emit_none(A_FYL2X,S_NO);
  508. end;
  509. procedure tx86inlinenode.second_cos_real;
  510. begin
  511. {$ifdef i8086}
  512. { FCOS is 387+ }
  513. if current_settings.cputype < cpu_386 then
  514. begin
  515. inherited;
  516. exit;
  517. end;
  518. {$endif i8086}
  519. load_fpu_location(left);
  520. emit_none(A_FCOS,S_NO);
  521. end;
  522. procedure tx86inlinenode.second_sin_real;
  523. begin
  524. {$ifdef i8086}
  525. { FSIN is 387+ }
  526. if current_settings.cputype < cpu_386 then
  527. begin
  528. inherited;
  529. exit;
  530. end;
  531. {$endif i8086}
  532. load_fpu_location(left);
  533. emit_none(A_FSIN,S_NO)
  534. end;
  535. procedure tx86inlinenode.second_prefetch;
  536. var
  537. ref : treference;
  538. r : tregister;
  539. checkpointer_used : boolean;
  540. begin
  541. {$if defined(i386) or defined(i8086)}
  542. if current_settings.cputype>=cpu_Pentium3 then
  543. {$endif i386 or i8086}
  544. begin
  545. { do not call Checkpointer for left node }
  546. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  547. if checkpointer_used then
  548. node_change_local_switch(left,cs_checkpointer,false);
  549. secondpass(left);
  550. if checkpointer_used then
  551. node_change_local_switch(left,cs_checkpointer,false);
  552. case left.location.loc of
  553. LOC_CREFERENCE,
  554. LOC_REFERENCE:
  555. begin
  556. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  557. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  558. reference_reset_base(ref,r,0,left.location.reference.alignment,left.location.reference.volatility);
  559. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  560. end;
  561. else
  562. { nothing to prefetch };
  563. end;
  564. end;
  565. end;
  566. {$ifndef i8086}
  567. procedure tx86inlinenode.second_abs_long;
  568. var
  569. hregister : tregister;
  570. opsize : tcgsize;
  571. hp : taicpu;
  572. begin
  573. {$ifdef i386}
  574. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  575. begin
  576. opsize:=def_cgsize(left.resultdef);
  577. secondpass(left);
  578. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  579. location:=left.location;
  580. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  581. emit_reg_reg(A_MOV,S_L,left.location.register,location.register);
  582. emit_const_reg(A_SAR,tcgsize2opsize[opsize],31,left.location.register);
  583. emit_reg_reg(A_XOR,S_L,left.location.register,location.register);
  584. emit_reg_reg(A_SUB,S_L,left.location.register,location.register);
  585. end
  586. else
  587. {$endif i386}
  588. begin
  589. opsize:=def_cgsize(left.resultdef);
  590. secondpass(left);
  591. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  592. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  593. location:=left.location;
  594. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  595. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  596. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  597. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  598. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  599. hp.condition:=C_NS;
  600. current_asmdata.CurrAsmList.concat(hp);
  601. end;
  602. end;
  603. {$endif not i8086}
  604. {*****************************************************************************
  605. INCLUDE/EXCLUDE GENERIC HANDLING
  606. *****************************************************************************}
  607. procedure tx86inlinenode.second_IncludeExclude;
  608. var
  609. hregister,
  610. hregister2: tregister;
  611. setbase : aint;
  612. bitsperop,l : longint;
  613. cgop : topcg;
  614. asmop : tasmop;
  615. opdef : tdef;
  616. opsize,
  617. orgsize: tcgsize;
  618. begin
  619. {$ifdef i8086}
  620. { BTS and BTR are 386+ }
  621. if current_settings.cputype < cpu_386 then
  622. begin
  623. inherited;
  624. exit;
  625. end;
  626. {$endif i8086}
  627. if is_smallset(tcallparanode(left).resultdef) then
  628. begin
  629. opdef:=tcallparanode(left).resultdef;
  630. opsize:=int_cgsize(opdef.size)
  631. end
  632. else
  633. begin
  634. opdef:=u32inttype;
  635. opsize:=OS_32;
  636. end;
  637. bitsperop:=(8*tcgsize2size[opsize]);
  638. secondpass(tcallparanode(left).left);
  639. secondpass(tcallparanode(tcallparanode(left).right).left);
  640. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  641. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  642. begin
  643. { calculate bit position }
  644. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  645. { determine operator }
  646. if inlinenumber=in_include_x_y then
  647. cgop:=OP_OR
  648. else
  649. begin
  650. cgop:=OP_AND;
  651. l:=not(l);
  652. end;
  653. case tcallparanode(left).left.location.loc of
  654. LOC_REFERENCE :
  655. begin
  656. inc(tcallparanode(left).left.location.reference.offset,
  657. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  658. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  659. end;
  660. LOC_CREGISTER :
  661. cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
  662. else
  663. internalerror(200405022);
  664. end;
  665. end
  666. else
  667. begin
  668. orgsize:=opsize;
  669. if opsize in [OS_8,OS_S8] then
  670. begin
  671. opdef:=u32inttype;
  672. opsize:=OS_32;
  673. end;
  674. { determine asm operator }
  675. if inlinenumber=in_include_x_y then
  676. asmop:=A_BTS
  677. else
  678. asmop:=A_BTR;
  679. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  680. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  681. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  682. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  683. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  684. else
  685. begin
  686. { second argument can't be an 8 bit register either }
  687. hregister2:=tcallparanode(left).left.location.register;
  688. if (orgsize in [OS_8,OS_S8]) then
  689. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  690. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  691. end;
  692. end;
  693. end;
  694. procedure tx86inlinenode.second_popcnt;
  695. var
  696. opsize: tcgsize;
  697. begin
  698. secondpass(left);
  699. opsize:=tcgsize2unsigned[left.location.size];
  700. { no 8 Bit popcont }
  701. if opsize=OS_8 then
  702. opsize:=OS_16;
  703. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  704. (left.location.size<>opsize) then
  705. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  706. location_reset(location,LOC_REGISTER,opsize);
  707. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  708. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  709. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  710. else
  711. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  712. end;
  713. procedure tx86inlinenode.second_fma;
  714. const
  715. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  716. (
  717. { positive product }
  718. (
  719. { positive third operand }
  720. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  721. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  722. ),
  723. { negative third operand }
  724. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  725. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  726. )
  727. ),
  728. { negative product }
  729. (
  730. { positive third operand }
  731. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  732. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  733. ),
  734. { negative third operand }
  735. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  736. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  737. )
  738. )
  739. );
  740. var
  741. paraarray : array[1..3] of tnode;
  742. memop,
  743. i : integer;
  744. negop3,
  745. negproduct,
  746. gotmem : boolean;
  747. begin
  748. {$ifndef i8086}
  749. if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
  750. begin
  751. negop3:=false;
  752. negproduct:=false;
  753. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  754. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  755. paraarray[3]:=tcallparanode(parameters).paravalue;
  756. { check if a neg. node can be removed
  757. this is possible because changing the sign of
  758. a floating point number does not affect its absolute
  759. value in any way
  760. }
  761. if paraarray[1].nodetype=unaryminusn then
  762. begin
  763. paraarray[1]:=tunarynode(paraarray[1]).left;
  764. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  765. only no code is generated for it }
  766. negproduct:=not(negproduct);
  767. end;
  768. if paraarray[2].nodetype=unaryminusn then
  769. begin
  770. paraarray[2]:=tunarynode(paraarray[2]).left;
  771. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  772. only no code is generated for it }
  773. negproduct:=not(negproduct);
  774. end;
  775. if paraarray[3].nodetype=unaryminusn then
  776. begin
  777. paraarray[3]:=tunarynode(paraarray[3]).left;
  778. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  779. only no code is generated for it }
  780. negop3:=true;
  781. end;
  782. for i:=1 to 3 do
  783. secondpass(paraarray[i]);
  784. { only one memory operand is allowed }
  785. gotmem:=false;
  786. memop:=0;
  787. for i:=1 to 3 do
  788. begin
  789. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  790. begin
  791. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  792. begin
  793. memop:=i;
  794. gotmem:=true;
  795. end
  796. else
  797. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  798. end;
  799. end;
  800. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  801. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  802. if gotmem then
  803. begin
  804. case memop of
  805. 1:
  806. begin
  807. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  808. paraarray[3].location.register,location.register,mms_movescalar);
  809. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  810. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  811. end;
  812. 2:
  813. begin
  814. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  815. paraarray[3].location.register,location.register,mms_movescalar);
  816. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  817. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  818. end;
  819. 3:
  820. begin
  821. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  822. paraarray[1].location.register,location.register,mms_movescalar);
  823. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  824. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  825. end
  826. else
  827. internalerror(2014041301);
  828. end;
  829. end
  830. else
  831. begin
  832. { try to use the location which is already in a temp. mm register as destination,
  833. so the compiler might be able to re-use the register }
  834. if paraarray[1].location.loc=LOC_MMREGISTER then
  835. begin
  836. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  837. paraarray[1].location.register,location.register,mms_movescalar);
  838. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  839. paraarray[3].location.register,paraarray[2].location.register,location.register);
  840. end
  841. else if paraarray[2].location.loc=LOC_MMREGISTER then
  842. begin
  843. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  844. paraarray[2].location.register,location.register,mms_movescalar);
  845. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  846. paraarray[3].location.register,paraarray[1].location.register,location.register);
  847. end
  848. else
  849. begin
  850. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  851. paraarray[3].location.register,location.register,mms_movescalar);
  852. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  853. paraarray[1].location.register,paraarray[2].location.register,location.register);
  854. end;
  855. end;
  856. end
  857. else
  858. {$endif i8086}
  859. internalerror(2014032301);
  860. end;
  861. end.