nx86add.pas 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure left_and_right_must_be_fpureg;
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. procedure second_mul;virtual;abstract;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_addordinal;override;
  40. procedure second_cmpfloat;override;
  41. procedure second_cmpsmallset;override;
  42. procedure second_cmp64bit;override;
  43. procedure second_cmpordinal;override;
  44. {$ifdef SUPPORT_MMX}
  45. procedure second_opmmxset;override;
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. end;
  49. implementation
  50. uses
  51. globtype,globals,
  52. verbose,cutils,
  53. cpuinfo,
  54. aasmbase,aasmtai,aasmdata,aasmcpu,
  55. symconst,symdef,
  56. cgobj,cgx86,cga,cgutils,
  57. paramgr,tgobj,ncgutil,
  58. ncon,nset,
  59. defutil;
  60. {*****************************************************************************
  61. Helpers
  62. *****************************************************************************}
  63. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  64. var
  65. power : longint;
  66. hl4 : tasmlabel;
  67. r : Tregister;
  68. begin
  69. { at this point, left.location.loc should be LOC_REGISTER }
  70. if right.location.loc=LOC_REGISTER then
  71. begin
  72. { right.location is a LOC_REGISTER }
  73. { when swapped another result register }
  74. if (nodetype=subn) and (nf_swaped in flags) then
  75. begin
  76. if extra_not then
  77. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  78. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  79. { newly swapped also set swapped flag }
  80. location_swap(left.location,right.location);
  81. toggleflag(nf_swaped);
  82. end
  83. else
  84. begin
  85. if extra_not then
  86. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  87. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  88. location_swap(left.location,right.location);
  89. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  90. end;
  91. end
  92. else
  93. begin
  94. { right.location is not a LOC_REGISTER }
  95. if (nodetype=subn) and (nf_swaped in flags) then
  96. begin
  97. if extra_not then
  98. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  99. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  100. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  101. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  102. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  103. end
  104. else
  105. begin
  106. { Optimizations when right.location is a constant value }
  107. if (op=A_CMP) and
  108. (nodetype in [equaln,unequaln]) and
  109. (right.location.loc=LOC_CONSTANT) and
  110. (right.location.value=0) then
  111. begin
  112. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  113. end
  114. else
  115. if (op=A_ADD) and
  116. (right.location.loc=LOC_CONSTANT) and
  117. (right.location.value=1) and
  118. not(cs_check_overflow in aktlocalswitches) then
  119. begin
  120. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  121. end
  122. else
  123. if (op=A_SUB) and
  124. (right.location.loc=LOC_CONSTANT) and
  125. (right.location.value=1) and
  126. not(cs_check_overflow in aktlocalswitches) then
  127. begin
  128. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  129. end
  130. else
  131. if (op=A_IMUL) and
  132. (right.location.loc=LOC_CONSTANT) and
  133. (ispowerof2(int64(right.location.value),power)) and
  134. not(cs_check_overflow in aktlocalswitches) then
  135. begin
  136. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  137. end
  138. else
  139. begin
  140. if extra_not then
  141. begin
  142. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  143. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  144. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  145. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  146. end
  147. else
  148. begin
  149. emit_op_right_left(op,opsize);
  150. end;
  151. end;
  152. end;
  153. end;
  154. { only in case of overflow operations }
  155. { produce overflow code }
  156. { we must put it here directly, because sign of operation }
  157. { is in unsigned VAR!! }
  158. if mboverflow then
  159. begin
  160. if cs_check_overflow in aktlocalswitches then
  161. begin
  162. current_asmdata.getjumplabel(hl4);
  163. if unsigned then
  164. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  165. else
  166. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  167. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW');
  168. cg.a_label(current_asmdata.CurrAsmList,hl4);
  169. end;
  170. end;
  171. end;
  172. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  173. begin
  174. { left location is not a register? }
  175. if (left.location.loc<>LOC_REGISTER) then
  176. begin
  177. { if right is register then we can swap the locations }
  178. if (not noswap) and
  179. (right.location.loc=LOC_REGISTER) then
  180. begin
  181. location_swap(left.location,right.location);
  182. toggleflag(nf_swaped);
  183. end
  184. else
  185. begin
  186. { maybe we can reuse a constant register when the
  187. operation is a comparison that doesn't change the
  188. value of the register }
  189. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  190. end;
  191. end;
  192. end;
  193. procedure tx86addnode.left_and_right_must_be_fpureg;
  194. begin
  195. if (right.location.loc<>LOC_FPUREGISTER) then
  196. begin
  197. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  198. if (left.location.loc<>LOC_FPUREGISTER) then
  199. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  200. else
  201. { left was on the stack => swap }
  202. toggleflag(nf_swaped);
  203. end
  204. { the nominator in st0 }
  205. else if (left.location.loc<>LOC_FPUREGISTER) then
  206. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  207. else
  208. begin
  209. { fpu operands are always in the wrong order on the stack }
  210. toggleflag(nf_swaped);
  211. end;
  212. end;
  213. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  214. {$ifdef x86_64}
  215. var
  216. tmpreg : tregister;
  217. {$endif x86_64}
  218. begin
  219. { left must be a register }
  220. case right.location.loc of
  221. LOC_REGISTER,
  222. LOC_CREGISTER :
  223. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  224. LOC_REFERENCE,
  225. LOC_CREFERENCE :
  226. begin
  227. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  228. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  229. end;
  230. LOC_CONSTANT :
  231. begin
  232. {$ifdef x86_64}
  233. { x86_64 only supports signed 32 bits constants directly }
  234. if (opsize in [OS_S64,OS_64]) and
  235. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  236. begin
  237. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  238. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  239. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  240. end
  241. else
  242. {$endif x86_64}
  243. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  244. end;
  245. else
  246. internalerror(200203232);
  247. end;
  248. end;
  249. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  250. begin
  251. case nodetype of
  252. equaln : getresflags:=F_E;
  253. unequaln : getresflags:=F_NE;
  254. else
  255. if not(unsigned) then
  256. begin
  257. if nf_swaped in flags then
  258. case nodetype of
  259. ltn : getresflags:=F_G;
  260. lten : getresflags:=F_GE;
  261. gtn : getresflags:=F_L;
  262. gten : getresflags:=F_LE;
  263. end
  264. else
  265. case nodetype of
  266. ltn : getresflags:=F_L;
  267. lten : getresflags:=F_LE;
  268. gtn : getresflags:=F_G;
  269. gten : getresflags:=F_GE;
  270. end;
  271. end
  272. else
  273. begin
  274. if nf_swaped in flags then
  275. case nodetype of
  276. ltn : getresflags:=F_A;
  277. lten : getresflags:=F_AE;
  278. gtn : getresflags:=F_B;
  279. gten : getresflags:=F_BE;
  280. end
  281. else
  282. case nodetype of
  283. ltn : getresflags:=F_B;
  284. lten : getresflags:=F_BE;
  285. gtn : getresflags:=F_A;
  286. gten : getresflags:=F_AE;
  287. end;
  288. end;
  289. end;
  290. end;
  291. {*****************************************************************************
  292. AddSmallSet
  293. *****************************************************************************}
  294. procedure tx86addnode.second_addsmallset;
  295. var
  296. opsize : TCGSize;
  297. op : TAsmOp;
  298. extra_not,
  299. noswap : boolean;
  300. begin
  301. pass_left_right;
  302. noswap:=false;
  303. extra_not:=false;
  304. opsize:=OS_32;
  305. case nodetype of
  306. addn :
  307. begin
  308. { this is a really ugly hack!!!!!!!!!! }
  309. { this could be done later using EDI }
  310. { as it is done for subn }
  311. { instead of two registers!!!! }
  312. { adding elements is not commutative }
  313. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  314. swapleftright;
  315. { are we adding set elements ? }
  316. if right.nodetype=setelementn then
  317. begin
  318. { no range support for smallsets! }
  319. if assigned(tsetelementnode(right).right) then
  320. internalerror(43244);
  321. { bts requires both elements to be registers }
  322. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  323. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  324. op:=A_BTS;
  325. noswap:=true;
  326. end
  327. else
  328. op:=A_OR;
  329. end;
  330. symdifn :
  331. op:=A_XOR;
  332. muln :
  333. op:=A_AND;
  334. subn :
  335. begin
  336. op:=A_AND;
  337. if (not(nf_swaped in flags)) and
  338. (right.location.loc=LOC_CONSTANT) then
  339. right.location.value := not(right.location.value)
  340. else if (nf_swaped in flags) and
  341. (left.location.loc=LOC_CONSTANT) then
  342. left.location.value := not(left.location.value)
  343. else
  344. extra_not:=true;
  345. end;
  346. xorn :
  347. op:=A_XOR;
  348. orn :
  349. op:=A_OR;
  350. andn :
  351. op:=A_AND;
  352. else
  353. internalerror(2003042215);
  354. end;
  355. { left must be a register }
  356. left_must_be_reg(opsize,noswap);
  357. emit_generic_code(op,opsize,true,extra_not,false);
  358. location_freetemp(current_asmdata.CurrAsmList,right.location);
  359. set_result_location_reg;
  360. end;
  361. procedure tx86addnode.second_cmpsmallset;
  362. var
  363. opsize : TCGSize;
  364. op : TAsmOp;
  365. begin
  366. pass_left_right;
  367. opsize:=OS_32;
  368. case nodetype of
  369. equaln,
  370. unequaln :
  371. op:=A_CMP;
  372. lten,gten:
  373. begin
  374. if (not(nf_swaped in flags) and (nodetype = lten)) or
  375. ((nf_swaped in flags) and (nodetype = gten)) then
  376. swapleftright;
  377. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  378. emit_op_right_left(A_AND,opsize);
  379. op:=A_CMP;
  380. { warning: ugly hack, we need a JE so change the node to equaln }
  381. nodetype:=equaln;
  382. end;
  383. else
  384. internalerror(2003042215);
  385. end;
  386. { left must be a register }
  387. left_must_be_reg(opsize,false);
  388. emit_generic_code(op,opsize,true,false,false);
  389. location_freetemp(current_asmdata.CurrAsmList,right.location);
  390. location_freetemp(current_asmdata.CurrAsmList,left.location);
  391. location_reset(location,LOC_FLAGS,OS_NO);
  392. location.resflags:=getresflags(true);
  393. end;
  394. {*****************************************************************************
  395. AddMMX
  396. *****************************************************************************}
  397. {$ifdef SUPPORT_MMX}
  398. procedure tx86addnode.second_opmmx;
  399. var
  400. op : TAsmOp;
  401. cmpop : boolean;
  402. mmxbase : tmmxtype;
  403. hreg,
  404. hregister : tregister;
  405. begin
  406. pass_left_right;
  407. cmpop:=false;
  408. mmxbase:=mmx_type(left.resulttype.def);
  409. location_reset(location,LOC_MMXREGISTER,def_cgsize(resulttype.def));
  410. case nodetype of
  411. addn :
  412. begin
  413. if (cs_mmx_saturation in aktlocalswitches) then
  414. begin
  415. case mmxbase of
  416. mmxs8bit:
  417. op:=A_PADDSB;
  418. mmxu8bit:
  419. op:=A_PADDUSB;
  420. mmxs16bit,mmxfixed16:
  421. op:=A_PADDSB;
  422. mmxu16bit:
  423. op:=A_PADDUSW;
  424. end;
  425. end
  426. else
  427. begin
  428. case mmxbase of
  429. mmxs8bit,mmxu8bit:
  430. op:=A_PADDB;
  431. mmxs16bit,mmxu16bit,mmxfixed16:
  432. op:=A_PADDW;
  433. mmxs32bit,mmxu32bit:
  434. op:=A_PADDD;
  435. end;
  436. end;
  437. end;
  438. muln :
  439. begin
  440. case mmxbase of
  441. mmxs16bit,mmxu16bit:
  442. op:=A_PMULLW;
  443. mmxfixed16:
  444. op:=A_PMULHW;
  445. end;
  446. end;
  447. subn :
  448. begin
  449. if (cs_mmx_saturation in aktlocalswitches) then
  450. begin
  451. case mmxbase of
  452. mmxs8bit:
  453. op:=A_PSUBSB;
  454. mmxu8bit:
  455. op:=A_PSUBUSB;
  456. mmxs16bit,mmxfixed16:
  457. op:=A_PSUBSB;
  458. mmxu16bit:
  459. op:=A_PSUBUSW;
  460. end;
  461. end
  462. else
  463. begin
  464. case mmxbase of
  465. mmxs8bit,mmxu8bit:
  466. op:=A_PSUBB;
  467. mmxs16bit,mmxu16bit,mmxfixed16:
  468. op:=A_PSUBW;
  469. mmxs32bit,mmxu32bit:
  470. op:=A_PSUBD;
  471. end;
  472. end;
  473. end;
  474. xorn:
  475. op:=A_PXOR;
  476. orn:
  477. op:=A_POR;
  478. andn:
  479. op:=A_PAND;
  480. else
  481. internalerror(2003042214);
  482. end;
  483. { left and right no register? }
  484. { then one must be demanded }
  485. if (left.location.loc<>LOC_MMXREGISTER) then
  486. begin
  487. if (right.location.loc=LOC_MMXREGISTER) then
  488. begin
  489. location_swap(left.location,right.location);
  490. toggleflag(nf_swaped);
  491. end
  492. else
  493. begin
  494. { register variable ? }
  495. if (left.location.loc=LOC_CMMXREGISTER) then
  496. begin
  497. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  498. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  499. end
  500. else
  501. begin
  502. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  503. internalerror(200203245);
  504. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  505. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  506. end;
  507. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  508. left.location.register:=hregister;
  509. end;
  510. end;
  511. { at this point, left.location.loc should be LOC_MMXREGISTER }
  512. if right.location.loc<>LOC_MMXREGISTER then
  513. begin
  514. if (nodetype=subn) and (nf_swaped in flags) then
  515. begin
  516. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  517. if right.location.loc=LOC_CMMXREGISTER then
  518. begin
  519. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  520. emit_reg_reg(op,S_NO,left.location.register,hreg);
  521. end
  522. else
  523. begin
  524. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  525. internalerror(200203247);
  526. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  527. emit_reg_reg(op,S_NO,left.location.register,hreg);
  528. end;
  529. location.register:=hreg;
  530. end
  531. else
  532. begin
  533. if (right.location.loc=LOC_CMMXREGISTER) then
  534. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  535. else
  536. begin
  537. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  538. internalerror(200203246);
  539. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  540. end;
  541. location.register:=left.location.register;
  542. end;
  543. end
  544. else
  545. begin
  546. { right.location=LOC_MMXREGISTER }
  547. if (nodetype=subn) and (nf_swaped in flags) then
  548. begin
  549. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  550. location_swap(left.location,right.location);
  551. toggleflag(nf_swaped);
  552. end
  553. else
  554. begin
  555. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  556. end;
  557. location.register:=left.location.register;
  558. end;
  559. location_freetemp(current_asmdata.CurrAsmList,right.location);
  560. if cmpop then
  561. location_freetemp(current_asmdata.CurrAsmList,left.location);
  562. end;
  563. {$endif SUPPORT_MMX}
  564. {*****************************************************************************
  565. addmmxset
  566. *****************************************************************************}
  567. {$ifdef SUPPORT_MMX}
  568. procedure tx86addnode.second_opmmxset;
  569. var opsize : TCGSize;
  570. op : TAsmOp;
  571. cmpop,
  572. noswap : boolean;
  573. begin
  574. pass_left_right;
  575. cmpop:=false;
  576. noswap:=false;
  577. opsize:=OS_32;
  578. case nodetype of
  579. addn:
  580. begin
  581. { are we adding set elements ? }
  582. if right.nodetype=setelementn then
  583. begin
  584. { adding elements is not commutative }
  585. { if nf_swaped in flags then
  586. swapleftright;}
  587. { bts requires both elements to be registers }
  588. { location_force_reg(current_asmdata.CurrAsmList,left.location,opsize_2_cgsize[opsize],false);
  589. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize_2_cgsize[opsize],true);
  590. op:=A_BTS;
  591. noswap:=true;}
  592. end
  593. else
  594. op:=A_POR;
  595. end;
  596. symdifn :
  597. op:=A_PXOR;
  598. muln:
  599. op:=A_PAND;
  600. subn:
  601. op:=A_PANDN;
  602. equaln,
  603. unequaln :
  604. begin
  605. op:=A_PCMPEQD;
  606. cmpop:=true;
  607. end;
  608. lten,gten:
  609. begin
  610. if (not(nf_swaped in flags) and (nodetype = lten)) or
  611. ((nf_swaped in flags) and (nodetype = gten)) then
  612. swapleftright;
  613. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  614. emit_op_right_left(A_AND,opsize);
  615. op:=A_PCMPEQD;
  616. cmpop:=true;
  617. { warning: ugly hack, we need a JE so change the node to equaln }
  618. nodetype:=equaln;
  619. end;
  620. xorn :
  621. op:=A_PXOR;
  622. orn :
  623. op:=A_POR;
  624. andn :
  625. op:=A_PAND;
  626. else
  627. internalerror(2003042215);
  628. end;
  629. { left must be a register }
  630. left_must_be_reg(opsize,noswap);
  631. { emit_generic_code(op,opsize,true,extra_not,false);}
  632. location_freetemp(current_asmdata.CurrAsmList,right.location);
  633. if cmpop then
  634. location_freetemp(current_asmdata.CurrAsmList,left.location);
  635. end;
  636. {$endif SUPPORT_MMX}
  637. {*****************************************************************************
  638. AddFloat
  639. *****************************************************************************}
  640. procedure tx86addnode.second_addfloatsse;
  641. var
  642. op : topcg;
  643. begin
  644. pass_left_right;
  645. if (nf_swaped in flags) then
  646. swapleftright;
  647. case nodetype of
  648. addn :
  649. op:=OP_ADD;
  650. muln :
  651. op:=OP_MUL;
  652. subn :
  653. op:=OP_SUB;
  654. slashn :
  655. op:=OP_DIV;
  656. else
  657. internalerror(200312231);
  658. end;
  659. location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
  660. { we can use only right as left operand if the operation is commutative }
  661. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  662. begin
  663. location.register:=right.location.register;
  664. { force floating point reg. location to be written to memory,
  665. we don't force it to mm register because writing to memory
  666. allows probably shorter code because there is no direct fpu->mm register
  667. copy instruction
  668. }
  669. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  670. location_force_mem(current_asmdata.CurrAsmList,left.location);
  671. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  672. end
  673. else
  674. begin
  675. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  676. location.register:=left.location.register;
  677. { force floating point reg. location to be written to memory,
  678. we don't force it to mm register because writing to memory
  679. allows probably shorter code because there is no direct fpu->mm register
  680. copy instruction
  681. }
  682. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  683. location_force_mem(current_asmdata.CurrAsmList,right.location);
  684. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  685. end;
  686. end;
  687. procedure tx86addnode.second_cmpfloatsse;
  688. var
  689. op : tasmop;
  690. begin
  691. if is_single(left.resulttype.def) then
  692. op:=A_COMISS
  693. else if is_double(left.resulttype.def) then
  694. op:=A_COMISD
  695. else
  696. internalerror(200402222);
  697. pass_left_right;
  698. location_reset(location,LOC_FLAGS,def_cgsize(resulttype.def));
  699. { we can use only right as left operand if the operation is commutative }
  700. if (right.location.loc=LOC_MMREGISTER) then
  701. begin
  702. { force floating point reg. location to be written to memory,
  703. we don't force it to mm register because writing to memory
  704. allows probably shorter code because there is no direct fpu->mm register
  705. copy instruction
  706. }
  707. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  708. location_force_mem(current_asmdata.CurrAsmList,left.location);
  709. case left.location.loc of
  710. LOC_REFERENCE,LOC_CREFERENCE:
  711. begin
  712. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  713. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  714. end;
  715. LOC_MMREGISTER,LOC_CMMREGISTER:
  716. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  717. else
  718. internalerror(200402221);
  719. end;
  720. if nf_swaped in flags then
  721. exclude(flags,nf_swaped)
  722. else
  723. include(flags,nf_swaped)
  724. end
  725. else
  726. begin
  727. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  728. { force floating point reg. location to be written to memory,
  729. we don't force it to mm register because writing to memory
  730. allows probably shorter code because there is no direct fpu->mm register
  731. copy instruction
  732. }
  733. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  734. location_force_mem(current_asmdata.CurrAsmList,right.location);
  735. case right.location.loc of
  736. LOC_REFERENCE,LOC_CREFERENCE:
  737. begin
  738. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  739. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  740. end;
  741. LOC_MMREGISTER,LOC_CMMREGISTER:
  742. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  743. else
  744. internalerror(200402223);
  745. end;
  746. end;
  747. location.resflags:=getresflags(true);
  748. end;
  749. procedure tx86addnode.second_addfloat;
  750. var
  751. op : TAsmOp;
  752. begin
  753. if use_sse(resulttype.def) then
  754. begin
  755. second_addfloatsse;
  756. exit;
  757. end;
  758. pass_left_right;
  759. case nodetype of
  760. addn :
  761. op:=A_FADDP;
  762. muln :
  763. op:=A_FMULP;
  764. subn :
  765. op:=A_FSUBP;
  766. slashn :
  767. op:=A_FDIVP;
  768. else
  769. internalerror(2003042214);
  770. end;
  771. left_and_right_must_be_fpureg;
  772. { if we swaped the tree nodes, then use the reverse operator }
  773. if nf_swaped in flags then
  774. begin
  775. if (nodetype=slashn) then
  776. op:=A_FDIVRP
  777. else if (nodetype=subn) then
  778. op:=A_FSUBRP;
  779. end;
  780. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  781. tcgx86(cg).dec_fpu_stack;
  782. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  783. location.register:=NR_ST;
  784. end;
  785. procedure tx86addnode.second_cmpfloat;
  786. var
  787. resflags : tresflags;
  788. begin
  789. if use_sse(left.resulttype.def) or use_sse(right.resulttype.def) then
  790. begin
  791. second_cmpfloatsse;
  792. exit;
  793. end;
  794. pass_left_right;
  795. left_and_right_must_be_fpureg;
  796. {$ifndef x86_64}
  797. if aktcputype<cpu_Pentium2 then
  798. begin
  799. emit_none(A_FCOMPP,S_NO);
  800. tcgx86(cg).dec_fpu_stack;
  801. tcgx86(cg).dec_fpu_stack;
  802. { load fpu flags }
  803. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  804. emit_reg(A_FNSTSW,S_NO,NR_AX);
  805. emit_none(A_SAHF,S_NO);
  806. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  807. if nf_swaped in flags then
  808. begin
  809. case nodetype of
  810. equaln : resflags:=F_E;
  811. unequaln : resflags:=F_NE;
  812. ltn : resflags:=F_A;
  813. lten : resflags:=F_AE;
  814. gtn : resflags:=F_B;
  815. gten : resflags:=F_BE;
  816. end;
  817. end
  818. else
  819. begin
  820. case nodetype of
  821. equaln : resflags:=F_E;
  822. unequaln : resflags:=F_NE;
  823. ltn : resflags:=F_B;
  824. lten : resflags:=F_BE;
  825. gtn : resflags:=F_A;
  826. gten : resflags:=F_AE;
  827. end;
  828. end;
  829. end
  830. else
  831. {$endif x86_64}
  832. begin
  833. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  834. { fcomip pops only one fpu register }
  835. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  836. tcgx86(cg).dec_fpu_stack;
  837. tcgx86(cg).dec_fpu_stack;
  838. { load fpu flags }
  839. if nf_swaped in flags then
  840. begin
  841. case nodetype of
  842. equaln : resflags:=F_E;
  843. unequaln : resflags:=F_NE;
  844. ltn : resflags:=F_A;
  845. lten : resflags:=F_AE;
  846. gtn : resflags:=F_B;
  847. gten : resflags:=F_BE;
  848. end;
  849. end
  850. else
  851. begin
  852. case nodetype of
  853. equaln : resflags:=F_E;
  854. unequaln : resflags:=F_NE;
  855. ltn : resflags:=F_B;
  856. lten : resflags:=F_BE;
  857. gtn : resflags:=F_A;
  858. gten : resflags:=F_AE;
  859. end;
  860. end;
  861. end;
  862. location_reset(location,LOC_FLAGS,OS_NO);
  863. location.resflags:=resflags;
  864. end;
  865. {*****************************************************************************
  866. Add64bit
  867. *****************************************************************************}
  868. procedure tx86addnode.second_add64bit;
  869. begin
  870. {$ifdef cpu64bit}
  871. second_addordinal;
  872. {$else cpu64bit}
  873. { must be implemented separate }
  874. internalerror(200402042);
  875. {$endif cpu64bit}
  876. end;
  877. procedure tx86addnode.second_cmp64bit;
  878. begin
  879. {$ifdef cpu64bit}
  880. second_cmpordinal;
  881. {$else cpu64bit}
  882. { must be implemented separate }
  883. internalerror(200402043);
  884. {$endif cpu64bit}
  885. end;
  886. {*****************************************************************************
  887. AddOrdinal
  888. *****************************************************************************}
  889. procedure tx86addnode.second_addordinal;
  890. begin
  891. { filter unsigned MUL opcode, which requires special handling }
  892. if (nodetype=muln) and
  893. (not(is_signed(left.resulttype.def)) or
  894. not(is_signed(right.resulttype.def))) then
  895. begin
  896. second_mul;
  897. exit;
  898. end;
  899. inherited second_addordinal;
  900. end;
  901. procedure tx86addnode.second_cmpordinal;
  902. var
  903. opsize : tcgsize;
  904. unsigned : boolean;
  905. begin
  906. unsigned:=not(is_signed(left.resulttype.def)) or
  907. not(is_signed(right.resulttype.def));
  908. opsize:=def_cgsize(left.resulttype.def);
  909. pass_left_right;
  910. left_must_be_reg(opsize,false);
  911. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  912. location_freetemp(current_asmdata.CurrAsmList,right.location);
  913. location_freetemp(current_asmdata.CurrAsmList,left.location);
  914. location_reset(location,LOC_FLAGS,OS_NO);
  915. location.resflags:=getresflags(unsigned);
  916. end;
  917. begin
  918. caddnode:=tx86addnode;
  919. end.