nx86add.pas 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure left_and_right_must_be_fpureg;
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. procedure second_mul;virtual;abstract;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_addordinal;override;
  40. procedure second_cmpfloat;override;
  41. procedure second_cmpsmallset;override;
  42. procedure second_cmp64bit;override;
  43. procedure second_cmpordinal;override;
  44. {$ifdef SUPPORT_MMX}
  45. procedure second_opmmxset;override;
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. end;
  49. implementation
  50. uses
  51. globtype,globals,
  52. verbose,cutils,
  53. cpuinfo,
  54. aasmbase,aasmtai,aasmcpu,
  55. symconst,symdef,
  56. cgobj,cgx86,cga,cgutils,
  57. paramgr,tgobj,ncgutil,
  58. ncon,nset,
  59. defutil;
  60. {*****************************************************************************
  61. Helpers
  62. *****************************************************************************}
  63. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  64. var
  65. power : longint;
  66. hl4 : tasmlabel;
  67. r : Tregister;
  68. begin
  69. { at this point, left.location.loc should be LOC_REGISTER }
  70. if right.location.loc=LOC_REGISTER then
  71. begin
  72. { right.location is a LOC_REGISTER }
  73. { when swapped another result register }
  74. if (nodetype=subn) and (nf_swaped in flags) then
  75. begin
  76. if extra_not then
  77. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  78. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  79. { newly swapped also set swapped flag }
  80. location_swap(left.location,right.location);
  81. toggleflag(nf_swaped);
  82. end
  83. else
  84. begin
  85. if extra_not then
  86. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  87. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  88. location_swap(left.location,right.location);
  89. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  90. end;
  91. end
  92. else
  93. begin
  94. { right.location is not a LOC_REGISTER }
  95. if (nodetype=subn) and (nf_swaped in flags) then
  96. begin
  97. if extra_not then
  98. cg.a_op_reg_reg(exprasmlist,OP_NOT,opsize,left.location.register,left.location.register);
  99. r:=cg.getintregister(exprasmlist,opsize);
  100. cg.a_load_loc_reg(exprasmlist,opsize,right.location,r);
  101. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  102. cg.a_load_reg_reg(exprasmlist,opsize,opsize,r,left.location.register);
  103. end
  104. else
  105. begin
  106. { Optimizations when right.location is a constant value }
  107. if (op=A_CMP) and
  108. (nodetype in [equaln,unequaln]) and
  109. (right.location.loc=LOC_CONSTANT) and
  110. (right.location.value=0) then
  111. begin
  112. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  113. end
  114. else
  115. if (op=A_ADD) and
  116. (right.location.loc=LOC_CONSTANT) and
  117. (right.location.value=1) and
  118. not(cs_check_overflow in aktlocalswitches) then
  119. begin
  120. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  121. end
  122. else
  123. if (op=A_SUB) and
  124. (right.location.loc=LOC_CONSTANT) and
  125. (right.location.value=1) and
  126. not(cs_check_overflow in aktlocalswitches) then
  127. begin
  128. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  129. end
  130. else
  131. if (op=A_IMUL) and
  132. (right.location.loc=LOC_CONSTANT) and
  133. (ispowerof2(int64(right.location.value),power)) and
  134. not(cs_check_overflow in aktlocalswitches) then
  135. begin
  136. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  137. end
  138. else
  139. begin
  140. if extra_not then
  141. begin
  142. r:=cg.getintregister(exprasmlist,opsize);
  143. cg.a_load_loc_reg(exprasmlist,opsize,right.location,r);
  144. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  145. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  146. end
  147. else
  148. begin
  149. emit_op_right_left(op,opsize);
  150. end;
  151. end;
  152. end;
  153. end;
  154. { only in case of overflow operations }
  155. { produce overflow code }
  156. { we must put it here directly, because sign of operation }
  157. { is in unsigned VAR!! }
  158. if mboverflow then
  159. begin
  160. if cs_check_overflow in aktlocalswitches then
  161. begin
  162. objectlibrary.getjumplabel(hl4);
  163. if unsigned then
  164. cg.a_jmp_flags(exprasmlist,F_AE,hl4)
  165. else
  166. cg.a_jmp_flags(exprasmlist,F_NO,hl4);
  167. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  168. cg.a_label(exprasmlist,hl4);
  169. end;
  170. end;
  171. end;
  172. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  173. begin
  174. { left location is not a register? }
  175. if (left.location.loc<>LOC_REGISTER) then
  176. begin
  177. { if right is register then we can swap the locations }
  178. if (not noswap) and
  179. (right.location.loc=LOC_REGISTER) then
  180. begin
  181. location_swap(left.location,right.location);
  182. toggleflag(nf_swaped);
  183. end
  184. else
  185. begin
  186. { maybe we can reuse a constant register when the
  187. operation is a comparison that doesn't change the
  188. value of the register }
  189. location_force_reg(exprasmlist,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  190. end;
  191. end;
  192. end;
  193. procedure tx86addnode.left_and_right_must_be_fpureg;
  194. begin
  195. if (right.location.loc<>LOC_FPUREGISTER) then
  196. begin
  197. cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
  198. if (right.location.loc <> LOC_CFPUREGISTER) then
  199. location_freetemp(exprasmlist,left.location);
  200. if (left.location.loc<>LOC_FPUREGISTER) then
  201. begin
  202. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  203. if (left.location.loc <> LOC_CFPUREGISTER) then
  204. location_freetemp(exprasmlist,left.location);
  205. end
  206. else
  207. begin
  208. { left was on the stack => swap }
  209. toggleflag(nf_swaped);
  210. end;
  211. end
  212. { the nominator in st0 }
  213. else if (left.location.loc<>LOC_FPUREGISTER) then
  214. begin
  215. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  216. if (left.location.loc <> LOC_CFPUREGISTER) then
  217. location_freetemp(exprasmlist,left.location);
  218. end
  219. else
  220. begin
  221. { fpu operands are always in the wrong order on the stack }
  222. toggleflag(nf_swaped);
  223. end;
  224. end;
  225. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  226. {$ifdef x86_64}
  227. var
  228. tmpreg : tregister;
  229. {$endif x86_64}
  230. begin
  231. { left must be a register }
  232. case right.location.loc of
  233. LOC_REGISTER,
  234. LOC_CREGISTER :
  235. exprasmlist.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  236. LOC_REFERENCE,
  237. LOC_CREFERENCE :
  238. begin
  239. tcgx86(cg).make_simple_ref(exprasmlist,right.location.reference);
  240. exprasmlist.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  241. end;
  242. LOC_CONSTANT :
  243. begin
  244. {$ifdef x86_64}
  245. { x86_64 only supports signed 32 bits constants directly }
  246. if (opsize in [OS_S64,OS_64]) and
  247. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  248. begin
  249. tmpreg:=cg.getintregister(exprasmlist,opsize);
  250. cg.a_load_const_reg(exprasmlist,opsize,right.location.value,tmpreg);
  251. exprasmlist.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  252. end
  253. else
  254. {$endif x86_64}
  255. exprasmlist.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  256. end;
  257. else
  258. internalerror(200203232);
  259. end;
  260. end;
  261. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  262. begin
  263. case nodetype of
  264. equaln : getresflags:=F_E;
  265. unequaln : getresflags:=F_NE;
  266. else
  267. if not(unsigned) then
  268. begin
  269. if nf_swaped in flags then
  270. case nodetype of
  271. ltn : getresflags:=F_G;
  272. lten : getresflags:=F_GE;
  273. gtn : getresflags:=F_L;
  274. gten : getresflags:=F_LE;
  275. end
  276. else
  277. case nodetype of
  278. ltn : getresflags:=F_L;
  279. lten : getresflags:=F_LE;
  280. gtn : getresflags:=F_G;
  281. gten : getresflags:=F_GE;
  282. end;
  283. end
  284. else
  285. begin
  286. if nf_swaped in flags then
  287. case nodetype of
  288. ltn : getresflags:=F_A;
  289. lten : getresflags:=F_AE;
  290. gtn : getresflags:=F_B;
  291. gten : getresflags:=F_BE;
  292. end
  293. else
  294. case nodetype of
  295. ltn : getresflags:=F_B;
  296. lten : getresflags:=F_BE;
  297. gtn : getresflags:=F_A;
  298. gten : getresflags:=F_AE;
  299. end;
  300. end;
  301. end;
  302. end;
  303. {*****************************************************************************
  304. AddSmallSet
  305. *****************************************************************************}
  306. procedure tx86addnode.second_addsmallset;
  307. var
  308. opsize : TCGSize;
  309. op : TAsmOp;
  310. extra_not,
  311. noswap : boolean;
  312. begin
  313. pass_left_right;
  314. noswap:=false;
  315. extra_not:=false;
  316. opsize:=OS_32;
  317. case nodetype of
  318. addn :
  319. begin
  320. { this is a really ugly hack!!!!!!!!!! }
  321. { this could be done later using EDI }
  322. { as it is done for subn }
  323. { instead of two registers!!!! }
  324. { adding elements is not commutative }
  325. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  326. swapleftright;
  327. { are we adding set elements ? }
  328. if right.nodetype=setelementn then
  329. begin
  330. { no range support for smallsets! }
  331. if assigned(tsetelementnode(right).right) then
  332. internalerror(43244);
  333. { bts requires both elements to be registers }
  334. location_force_reg(exprasmlist,left.location,opsize,false);
  335. location_force_reg(exprasmlist,right.location,opsize,true);
  336. op:=A_BTS;
  337. noswap:=true;
  338. end
  339. else
  340. op:=A_OR;
  341. end;
  342. symdifn :
  343. op:=A_XOR;
  344. muln :
  345. op:=A_AND;
  346. subn :
  347. begin
  348. op:=A_AND;
  349. if (not(nf_swaped in flags)) and
  350. (right.location.loc=LOC_CONSTANT) then
  351. right.location.value := not(right.location.value)
  352. else if (nf_swaped in flags) and
  353. (left.location.loc=LOC_CONSTANT) then
  354. left.location.value := not(left.location.value)
  355. else
  356. extra_not:=true;
  357. end;
  358. xorn :
  359. op:=A_XOR;
  360. orn :
  361. op:=A_OR;
  362. andn :
  363. op:=A_AND;
  364. else
  365. internalerror(2003042215);
  366. end;
  367. { left must be a register }
  368. left_must_be_reg(opsize,noswap);
  369. emit_generic_code(op,opsize,true,extra_not,false);
  370. location_freetemp(exprasmlist,right.location);
  371. set_result_location_reg;
  372. end;
  373. procedure tx86addnode.second_cmpsmallset;
  374. var
  375. opsize : TCGSize;
  376. op : TAsmOp;
  377. begin
  378. pass_left_right;
  379. opsize:=OS_32;
  380. case nodetype of
  381. equaln,
  382. unequaln :
  383. op:=A_CMP;
  384. lten,gten:
  385. begin
  386. if (not(nf_swaped in flags) and (nodetype = lten)) or
  387. ((nf_swaped in flags) and (nodetype = gten)) then
  388. swapleftright;
  389. location_force_reg(exprasmlist,left.location,opsize,true);
  390. emit_op_right_left(A_AND,opsize);
  391. op:=A_CMP;
  392. { warning: ugly hack, we need a JE so change the node to equaln }
  393. nodetype:=equaln;
  394. end;
  395. else
  396. internalerror(2003042215);
  397. end;
  398. { left must be a register }
  399. left_must_be_reg(opsize,false);
  400. emit_generic_code(op,opsize,true,false,false);
  401. location_freetemp(exprasmlist,right.location);
  402. location_freetemp(exprasmlist,left.location);
  403. location_reset(location,LOC_FLAGS,OS_NO);
  404. location.resflags:=getresflags(true);
  405. end;
  406. {*****************************************************************************
  407. AddMMX
  408. *****************************************************************************}
  409. {$ifdef SUPPORT_MMX}
  410. procedure tx86addnode.second_opmmx;
  411. var
  412. op : TAsmOp;
  413. cmpop : boolean;
  414. mmxbase : tmmxtype;
  415. hreg,
  416. hregister : tregister;
  417. begin
  418. pass_left_right;
  419. cmpop:=false;
  420. mmxbase:=mmx_type(left.resulttype.def);
  421. location_reset(location,LOC_MMXREGISTER,def_cgsize(resulttype.def));
  422. case nodetype of
  423. addn :
  424. begin
  425. if (cs_mmx_saturation in aktlocalswitches) then
  426. begin
  427. case mmxbase of
  428. mmxs8bit:
  429. op:=A_PADDSB;
  430. mmxu8bit:
  431. op:=A_PADDUSB;
  432. mmxs16bit,mmxfixed16:
  433. op:=A_PADDSB;
  434. mmxu16bit:
  435. op:=A_PADDUSW;
  436. end;
  437. end
  438. else
  439. begin
  440. case mmxbase of
  441. mmxs8bit,mmxu8bit:
  442. op:=A_PADDB;
  443. mmxs16bit,mmxu16bit,mmxfixed16:
  444. op:=A_PADDW;
  445. mmxs32bit,mmxu32bit:
  446. op:=A_PADDD;
  447. end;
  448. end;
  449. end;
  450. muln :
  451. begin
  452. case mmxbase of
  453. mmxs16bit,mmxu16bit:
  454. op:=A_PMULLW;
  455. mmxfixed16:
  456. op:=A_PMULHW;
  457. end;
  458. end;
  459. subn :
  460. begin
  461. if (cs_mmx_saturation in aktlocalswitches) then
  462. begin
  463. case mmxbase of
  464. mmxs8bit:
  465. op:=A_PSUBSB;
  466. mmxu8bit:
  467. op:=A_PSUBUSB;
  468. mmxs16bit,mmxfixed16:
  469. op:=A_PSUBSB;
  470. mmxu16bit:
  471. op:=A_PSUBUSW;
  472. end;
  473. end
  474. else
  475. begin
  476. case mmxbase of
  477. mmxs8bit,mmxu8bit:
  478. op:=A_PSUBB;
  479. mmxs16bit,mmxu16bit,mmxfixed16:
  480. op:=A_PSUBW;
  481. mmxs32bit,mmxu32bit:
  482. op:=A_PSUBD;
  483. end;
  484. end;
  485. end;
  486. xorn:
  487. op:=A_PXOR;
  488. orn:
  489. op:=A_POR;
  490. andn:
  491. op:=A_PAND;
  492. else
  493. internalerror(2003042214);
  494. end;
  495. { left and right no register? }
  496. { then one must be demanded }
  497. if (left.location.loc<>LOC_MMXREGISTER) then
  498. begin
  499. if (right.location.loc=LOC_MMXREGISTER) then
  500. begin
  501. location_swap(left.location,right.location);
  502. toggleflag(nf_swaped);
  503. end
  504. else
  505. begin
  506. { register variable ? }
  507. if (left.location.loc=LOC_CMMXREGISTER) then
  508. begin
  509. hregister:=tcgx86(cg).getmmxregister(exprasmlist);
  510. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  511. end
  512. else
  513. begin
  514. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  515. internalerror(200203245);
  516. hregister:=tcgx86(cg).getmmxregister(exprasmlist);
  517. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  518. end;
  519. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  520. left.location.register:=hregister;
  521. end;
  522. end;
  523. { at this point, left.location.loc should be LOC_MMXREGISTER }
  524. if right.location.loc<>LOC_MMXREGISTER then
  525. begin
  526. if (nodetype=subn) and (nf_swaped in flags) then
  527. begin
  528. hreg:=tcgx86(cg).getmmxregister(exprasmlist);
  529. if right.location.loc=LOC_CMMXREGISTER then
  530. begin
  531. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  532. emit_reg_reg(op,S_NO,left.location.register,hreg);
  533. end
  534. else
  535. begin
  536. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  537. internalerror(200203247);
  538. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  539. emit_reg_reg(op,S_NO,left.location.register,hreg);
  540. end;
  541. location.register:=hreg;
  542. end
  543. else
  544. begin
  545. if (right.location.loc=LOC_CMMXREGISTER) then
  546. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  547. else
  548. begin
  549. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  550. internalerror(200203246);
  551. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  552. end;
  553. location.register:=left.location.register;
  554. end;
  555. end
  556. else
  557. begin
  558. { right.location=LOC_MMXREGISTER }
  559. if (nodetype=subn) and (nf_swaped in flags) then
  560. begin
  561. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  562. location_swap(left.location,right.location);
  563. toggleflag(nf_swaped);
  564. end
  565. else
  566. begin
  567. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  568. end;
  569. location.register:=left.location.register;
  570. end;
  571. location_freetemp(exprasmlist,right.location);
  572. if cmpop then
  573. location_freetemp(exprasmlist,left.location);
  574. end;
  575. {$endif SUPPORT_MMX}
  576. {*****************************************************************************
  577. addmmxset
  578. *****************************************************************************}
  579. {$ifdef SUPPORT_MMX}
  580. procedure tx86addnode.second_opmmxset;
  581. var opsize : TCGSize;
  582. op : TAsmOp;
  583. cmpop,
  584. noswap : boolean;
  585. begin
  586. pass_left_right;
  587. cmpop:=false;
  588. noswap:=false;
  589. opsize:=OS_32;
  590. case nodetype of
  591. addn:
  592. begin
  593. { are we adding set elements ? }
  594. if right.nodetype=setelementn then
  595. begin
  596. { adding elements is not commutative }
  597. { if nf_swaped in flags then
  598. swapleftright;}
  599. { bts requires both elements to be registers }
  600. { location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  601. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],true);
  602. op:=A_BTS;
  603. noswap:=true;}
  604. end
  605. else
  606. op:=A_POR;
  607. end;
  608. symdifn :
  609. op:=A_PXOR;
  610. muln:
  611. op:=A_PAND;
  612. subn:
  613. op:=A_PANDN;
  614. equaln,
  615. unequaln :
  616. begin
  617. op:=A_PCMPEQD;
  618. cmpop:=true;
  619. end;
  620. lten,gten:
  621. begin
  622. if (not(nf_swaped in flags) and (nodetype = lten)) or
  623. ((nf_swaped in flags) and (nodetype = gten)) then
  624. swapleftright;
  625. location_force_reg(exprasmlist,left.location,opsize,true);
  626. emit_op_right_left(A_AND,opsize);
  627. op:=A_PCMPEQD;
  628. cmpop:=true;
  629. { warning: ugly hack, we need a JE so change the node to equaln }
  630. nodetype:=equaln;
  631. end;
  632. xorn :
  633. op:=A_PXOR;
  634. orn :
  635. op:=A_POR;
  636. andn :
  637. op:=A_PAND;
  638. else
  639. internalerror(2003042215);
  640. end;
  641. { left must be a register }
  642. left_must_be_reg(opsize,noswap);
  643. { emit_generic_code(op,opsize,true,extra_not,false);}
  644. location_freetemp(exprasmlist,right.location);
  645. if cmpop then
  646. location_freetemp(exprasmlist,left.location);
  647. end;
  648. {$endif SUPPORT_MMX}
  649. {*****************************************************************************
  650. AddFloat
  651. *****************************************************************************}
  652. procedure tx86addnode.second_addfloatsse;
  653. var
  654. op : topcg;
  655. begin
  656. pass_left_right;
  657. if (nf_swaped in flags) then
  658. swapleftright;
  659. case nodetype of
  660. addn :
  661. op:=OP_ADD;
  662. muln :
  663. op:=OP_MUL;
  664. subn :
  665. op:=OP_SUB;
  666. slashn :
  667. op:=OP_DIV;
  668. else
  669. internalerror(200312231);
  670. end;
  671. location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
  672. { we can use only right as left operand if the operation is commutative }
  673. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  674. begin
  675. location.register:=right.location.register;
  676. { force floating point reg. location to be written to memory,
  677. we don't force it to mm register because writing to memory
  678. allows probably shorter code because there is no direct fpu->mm register
  679. copy instruction
  680. }
  681. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  682. location_force_mem(exprasmlist,left.location);
  683. cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar);
  684. end
  685. else
  686. begin
  687. location_force_mmregscalar(exprasmlist,left.location,false);
  688. location.register:=left.location.register;
  689. { force floating point reg. location to be written to memory,
  690. we don't force it to mm register because writing to memory
  691. allows probably shorter code because there is no direct fpu->mm register
  692. copy instruction
  693. }
  694. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  695. location_force_mem(exprasmlist,right.location);
  696. cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar);
  697. end;
  698. end;
  699. procedure tx86addnode.second_cmpfloatsse;
  700. var
  701. op : tasmop;
  702. begin
  703. if is_single(left.resulttype.def) then
  704. op:=A_COMISS
  705. else if is_double(left.resulttype.def) then
  706. op:=A_COMISD
  707. else
  708. internalerror(200402222);
  709. pass_left_right;
  710. location_reset(location,LOC_FLAGS,def_cgsize(resulttype.def));
  711. { we can use only right as left operand if the operation is commutative }
  712. if (right.location.loc=LOC_MMREGISTER) then
  713. begin
  714. { force floating point reg. location to be written to memory,
  715. we don't force it to mm register because writing to memory
  716. allows probably shorter code because there is no direct fpu->mm register
  717. copy instruction
  718. }
  719. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  720. location_force_mem(exprasmlist,left.location);
  721. case left.location.loc of
  722. LOC_REFERENCE,LOC_CREFERENCE:
  723. begin
  724. tcgx86(cg).make_simple_ref(exprasmlist,left.location.reference);
  725. exprasmlist.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  726. end;
  727. LOC_MMREGISTER,LOC_CMMREGISTER:
  728. exprasmlist.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  729. else
  730. internalerror(200402221);
  731. end;
  732. if nf_swaped in flags then
  733. exclude(flags,nf_swaped)
  734. else
  735. include(flags,nf_swaped)
  736. end
  737. else
  738. begin
  739. location_force_mmregscalar(exprasmlist,left.location,false);
  740. { force floating point reg. location to be written to memory,
  741. we don't force it to mm register because writing to memory
  742. allows probably shorter code because there is no direct fpu->mm register
  743. copy instruction
  744. }
  745. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  746. location_force_mem(exprasmlist,right.location);
  747. case right.location.loc of
  748. LOC_REFERENCE,LOC_CREFERENCE:
  749. begin
  750. tcgx86(cg).make_simple_ref(exprasmlist,right.location.reference);
  751. exprasmlist.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  752. end;
  753. LOC_MMREGISTER,LOC_CMMREGISTER:
  754. exprasmlist.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  755. else
  756. internalerror(200402223);
  757. end;
  758. end;
  759. location.resflags:=getresflags(true);
  760. end;
  761. procedure tx86addnode.second_addfloat;
  762. var
  763. op : TAsmOp;
  764. begin
  765. if use_sse(resulttype.def) then
  766. begin
  767. second_addfloatsse;
  768. exit;
  769. end;
  770. pass_left_right;
  771. case nodetype of
  772. addn :
  773. op:=A_FADDP;
  774. muln :
  775. op:=A_FMULP;
  776. subn :
  777. op:=A_FSUBP;
  778. slashn :
  779. op:=A_FDIVP;
  780. else
  781. internalerror(2003042214);
  782. end;
  783. left_and_right_must_be_fpureg;
  784. { if we swaped the tree nodes, then use the reverse operator }
  785. if nf_swaped in flags then
  786. begin
  787. if (nodetype=slashn) then
  788. op:=A_FDIVRP
  789. else if (nodetype=subn) then
  790. op:=A_FSUBRP;
  791. end;
  792. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  793. tcgx86(cg).dec_fpu_stack;
  794. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  795. location.register:=NR_ST;
  796. end;
  797. procedure tx86addnode.second_cmpfloat;
  798. var
  799. resflags : tresflags;
  800. begin
  801. if use_sse(left.resulttype.def) or use_sse(right.resulttype.def) then
  802. begin
  803. second_cmpfloatsse;
  804. exit;
  805. end;
  806. pass_left_right;
  807. left_and_right_must_be_fpureg;
  808. {$ifndef x86_64}
  809. if aktcputype<cpu_Pentium2 then
  810. begin
  811. emit_none(A_FCOMPP,S_NO);
  812. tcgx86(cg).dec_fpu_stack;
  813. tcgx86(cg).dec_fpu_stack;
  814. { load fpu flags }
  815. cg.getcpuregister(exprasmlist,NR_AX);
  816. emit_reg(A_FNSTSW,S_NO,NR_AX);
  817. emit_none(A_SAHF,S_NO);
  818. cg.ungetcpuregister(exprasmlist,NR_AX);
  819. if nf_swaped in flags then
  820. begin
  821. case nodetype of
  822. equaln : resflags:=F_E;
  823. unequaln : resflags:=F_NE;
  824. ltn : resflags:=F_A;
  825. lten : resflags:=F_AE;
  826. gtn : resflags:=F_B;
  827. gten : resflags:=F_BE;
  828. end;
  829. end
  830. else
  831. begin
  832. case nodetype of
  833. equaln : resflags:=F_E;
  834. unequaln : resflags:=F_NE;
  835. ltn : resflags:=F_B;
  836. lten : resflags:=F_BE;
  837. gtn : resflags:=F_A;
  838. gten : resflags:=F_AE;
  839. end;
  840. end;
  841. end
  842. else
  843. {$endif x86_64}
  844. begin
  845. exprasmlist.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  846. { fcomip pops only one fpu register }
  847. exprasmlist.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  848. tcgx86(cg).dec_fpu_stack;
  849. tcgx86(cg).dec_fpu_stack;
  850. { load fpu flags }
  851. if nf_swaped in flags then
  852. begin
  853. case nodetype of
  854. equaln : resflags:=F_E;
  855. unequaln : resflags:=F_NE;
  856. ltn : resflags:=F_A;
  857. lten : resflags:=F_AE;
  858. gtn : resflags:=F_B;
  859. gten : resflags:=F_BE;
  860. end;
  861. end
  862. else
  863. begin
  864. case nodetype of
  865. equaln : resflags:=F_E;
  866. unequaln : resflags:=F_NE;
  867. ltn : resflags:=F_B;
  868. lten : resflags:=F_BE;
  869. gtn : resflags:=F_A;
  870. gten : resflags:=F_AE;
  871. end;
  872. end;
  873. end;
  874. location_reset(location,LOC_FLAGS,OS_NO);
  875. location.resflags:=resflags;
  876. end;
  877. {*****************************************************************************
  878. Add64bit
  879. *****************************************************************************}
  880. procedure tx86addnode.second_add64bit;
  881. begin
  882. {$ifdef cpu64bit}
  883. second_addordinal;
  884. {$else cpu64bit}
  885. { must be implemented separate }
  886. internalerror(200402042);
  887. {$endif cpu64bit}
  888. end;
  889. procedure tx86addnode.second_cmp64bit;
  890. begin
  891. {$ifdef cpu64bit}
  892. second_cmpordinal;
  893. {$else cpu64bit}
  894. { must be implemented separate }
  895. internalerror(200402043);
  896. {$endif cpu64bit}
  897. end;
  898. {*****************************************************************************
  899. AddOrdinal
  900. *****************************************************************************}
  901. procedure tx86addnode.second_addordinal;
  902. begin
  903. { filter unsigned MUL opcode, which requires special handling }
  904. if (nodetype=muln) and
  905. (not(is_signed(left.resulttype.def)) or
  906. not(is_signed(right.resulttype.def))) then
  907. begin
  908. second_mul;
  909. exit;
  910. end;
  911. inherited second_addordinal;
  912. end;
  913. procedure tx86addnode.second_cmpordinal;
  914. var
  915. opsize : tcgsize;
  916. unsigned : boolean;
  917. begin
  918. unsigned:=not(is_signed(left.resulttype.def)) or
  919. not(is_signed(right.resulttype.def));
  920. opsize:=def_cgsize(left.resulttype.def);
  921. pass_left_right;
  922. left_must_be_reg(opsize,false);
  923. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  924. location_freetemp(exprasmlist,right.location);
  925. location_freetemp(exprasmlist,left.location);
  926. location_reset(location,LOC_FLAGS,OS_NO);
  927. location.resflags:=getresflags(unsigned);
  928. end;
  929. begin
  930. caddnode:=tx86addnode;
  931. end.