nx86add.pas 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure left_and_right_must_be_fpureg;
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. procedure second_mul;virtual;abstract;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_addordinal;override;
  40. procedure second_cmpfloat;override;
  41. procedure second_cmpsmallset;override;
  42. procedure second_cmp64bit;override;
  43. procedure second_cmpordinal;override;
  44. {$ifdef SUPPORT_MMX}
  45. procedure second_opmmxset;override;
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. end;
  49. implementation
  50. uses
  51. globtype,globals,
  52. verbose,cutils,
  53. cpuinfo,
  54. aasmbase,aasmtai,aasmdata,aasmcpu,
  55. symconst,symdef,
  56. cgobj,cgx86,cga,cgutils,
  57. paramgr,tgobj,ncgutil,
  58. ncon,nset,
  59. defutil;
  60. {*****************************************************************************
  61. Helpers
  62. *****************************************************************************}
  63. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  64. var
  65. power : longint;
  66. hl4 : tasmlabel;
  67. r : Tregister;
  68. begin
  69. { at this point, left.location.loc should be LOC_REGISTER }
  70. if right.location.loc=LOC_REGISTER then
  71. begin
  72. { right.location is a LOC_REGISTER }
  73. { when swapped another result register }
  74. if (nodetype=subn) and (nf_swaped in flags) then
  75. begin
  76. if extra_not then
  77. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  78. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  79. { newly swapped also set swapped flag }
  80. location_swap(left.location,right.location);
  81. toggleflag(nf_swaped);
  82. end
  83. else
  84. begin
  85. if extra_not then
  86. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  87. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  88. location_swap(left.location,right.location);
  89. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  90. end;
  91. end
  92. else
  93. begin
  94. { right.location is not a LOC_REGISTER }
  95. if (nodetype=subn) and (nf_swaped in flags) then
  96. begin
  97. if extra_not then
  98. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  99. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  100. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  101. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  102. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  103. end
  104. else
  105. begin
  106. { Optimizations when right.location is a constant value }
  107. if (op=A_CMP) and
  108. (nodetype in [equaln,unequaln]) and
  109. (right.location.loc=LOC_CONSTANT) and
  110. (right.location.value=0) then
  111. begin
  112. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  113. end
  114. else
  115. if (op=A_ADD) and
  116. (right.location.loc=LOC_CONSTANT) and
  117. (right.location.value=1) and
  118. not(cs_check_overflow in aktlocalswitches) then
  119. begin
  120. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  121. end
  122. else
  123. if (op=A_SUB) and
  124. (right.location.loc=LOC_CONSTANT) and
  125. (right.location.value=1) and
  126. not(cs_check_overflow in aktlocalswitches) then
  127. begin
  128. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  129. end
  130. else
  131. if (op=A_IMUL) and
  132. (right.location.loc=LOC_CONSTANT) and
  133. (ispowerof2(int64(right.location.value),power)) and
  134. not(cs_check_overflow in aktlocalswitches) then
  135. begin
  136. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  137. end
  138. else
  139. begin
  140. if extra_not then
  141. begin
  142. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  143. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  144. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  145. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  146. end
  147. else
  148. begin
  149. emit_op_right_left(op,opsize);
  150. end;
  151. end;
  152. end;
  153. end;
  154. { only in case of overflow operations }
  155. { produce overflow code }
  156. { we must put it here directly, because sign of operation }
  157. { is in unsigned VAR!! }
  158. if mboverflow then
  159. begin
  160. if cs_check_overflow in aktlocalswitches then
  161. begin
  162. current_asmdata.getjumplabel(hl4);
  163. if unsigned then
  164. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  165. else
  166. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  167. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW');
  168. cg.a_label(current_asmdata.CurrAsmList,hl4);
  169. end;
  170. end;
  171. end;
  172. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  173. begin
  174. { left location is not a register? }
  175. if (left.location.loc<>LOC_REGISTER) then
  176. begin
  177. { if right is register then we can swap the locations }
  178. if (not noswap) and
  179. (right.location.loc=LOC_REGISTER) then
  180. begin
  181. location_swap(left.location,right.location);
  182. toggleflag(nf_swaped);
  183. end
  184. else
  185. begin
  186. { maybe we can reuse a constant register when the
  187. operation is a comparison that doesn't change the
  188. value of the register }
  189. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  190. end;
  191. end;
  192. end;
  193. procedure tx86addnode.left_and_right_must_be_fpureg;
  194. begin
  195. if (right.location.loc<>LOC_FPUREGISTER) then
  196. begin
  197. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  198. if (left.location.loc<>LOC_FPUREGISTER) then
  199. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  200. else
  201. { left was on the stack => swap }
  202. toggleflag(nf_swaped);
  203. end
  204. { the nominator in st0 }
  205. else if (left.location.loc<>LOC_FPUREGISTER) then
  206. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  207. else
  208. begin
  209. { fpu operands are always in the wrong order on the stack }
  210. toggleflag(nf_swaped);
  211. end;
  212. end;
  213. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  214. {$ifdef x86_64}
  215. var
  216. tmpreg : tregister;
  217. {$endif x86_64}
  218. begin
  219. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  220. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resulttype.def),true);
  221. { left must be a register }
  222. case right.location.loc of
  223. LOC_REGISTER,
  224. LOC_CREGISTER :
  225. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  226. LOC_REFERENCE,
  227. LOC_CREFERENCE :
  228. begin
  229. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  230. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  231. end;
  232. LOC_CONSTANT :
  233. begin
  234. {$ifdef x86_64}
  235. { x86_64 only supports signed 32 bits constants directly }
  236. if (opsize in [OS_S64,OS_64]) and
  237. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  238. begin
  239. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  240. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  241. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  242. end
  243. else
  244. {$endif x86_64}
  245. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  246. end;
  247. else
  248. internalerror(200203232);
  249. end;
  250. end;
  251. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  252. begin
  253. case nodetype of
  254. equaln : getresflags:=F_E;
  255. unequaln : getresflags:=F_NE;
  256. else
  257. if not(unsigned) then
  258. begin
  259. if nf_swaped in flags then
  260. case nodetype of
  261. ltn : getresflags:=F_G;
  262. lten : getresflags:=F_GE;
  263. gtn : getresflags:=F_L;
  264. gten : getresflags:=F_LE;
  265. end
  266. else
  267. case nodetype of
  268. ltn : getresflags:=F_L;
  269. lten : getresflags:=F_LE;
  270. gtn : getresflags:=F_G;
  271. gten : getresflags:=F_GE;
  272. end;
  273. end
  274. else
  275. begin
  276. if nf_swaped in flags then
  277. case nodetype of
  278. ltn : getresflags:=F_A;
  279. lten : getresflags:=F_AE;
  280. gtn : getresflags:=F_B;
  281. gten : getresflags:=F_BE;
  282. end
  283. else
  284. case nodetype of
  285. ltn : getresflags:=F_B;
  286. lten : getresflags:=F_BE;
  287. gtn : getresflags:=F_A;
  288. gten : getresflags:=F_AE;
  289. end;
  290. end;
  291. end;
  292. end;
  293. {*****************************************************************************
  294. AddSmallSet
  295. *****************************************************************************}
  296. procedure tx86addnode.second_addsmallset;
  297. var
  298. opsize : TCGSize;
  299. op : TAsmOp;
  300. extra_not,
  301. noswap : boolean;
  302. begin
  303. pass_left_right;
  304. noswap:=false;
  305. extra_not:=false;
  306. opsize:=OS_32;
  307. case nodetype of
  308. addn :
  309. begin
  310. { this is a really ugly hack!!!!!!!!!! }
  311. { this could be done later using EDI }
  312. { as it is done for subn }
  313. { instead of two registers!!!! }
  314. { adding elements is not commutative }
  315. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  316. swapleftright;
  317. { are we adding set elements ? }
  318. if right.nodetype=setelementn then
  319. begin
  320. { no range support for smallsets! }
  321. if assigned(tsetelementnode(right).right) then
  322. internalerror(43244);
  323. { bts requires both elements to be registers }
  324. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  325. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  326. op:=A_BTS;
  327. noswap:=true;
  328. end
  329. else
  330. op:=A_OR;
  331. end;
  332. symdifn :
  333. op:=A_XOR;
  334. muln :
  335. op:=A_AND;
  336. subn :
  337. begin
  338. op:=A_AND;
  339. if (not(nf_swaped in flags)) and
  340. (right.location.loc=LOC_CONSTANT) then
  341. right.location.value := not(right.location.value)
  342. else if (nf_swaped in flags) and
  343. (left.location.loc=LOC_CONSTANT) then
  344. left.location.value := not(left.location.value)
  345. else
  346. extra_not:=true;
  347. end;
  348. xorn :
  349. op:=A_XOR;
  350. orn :
  351. op:=A_OR;
  352. andn :
  353. op:=A_AND;
  354. else
  355. internalerror(2003042215);
  356. end;
  357. { left must be a register }
  358. left_must_be_reg(opsize,noswap);
  359. emit_generic_code(op,opsize,true,extra_not,false);
  360. location_freetemp(current_asmdata.CurrAsmList,right.location);
  361. set_result_location_reg;
  362. end;
  363. procedure tx86addnode.second_cmpsmallset;
  364. var
  365. opsize : TCGSize;
  366. op : TAsmOp;
  367. begin
  368. pass_left_right;
  369. opsize:=OS_32;
  370. case nodetype of
  371. equaln,
  372. unequaln :
  373. op:=A_CMP;
  374. lten,gten:
  375. begin
  376. if (not(nf_swaped in flags) and (nodetype = lten)) or
  377. ((nf_swaped in flags) and (nodetype = gten)) then
  378. swapleftright;
  379. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  380. emit_op_right_left(A_AND,opsize);
  381. op:=A_CMP;
  382. { warning: ugly hack, we need a JE so change the node to equaln }
  383. nodetype:=equaln;
  384. end;
  385. else
  386. internalerror(2003042215);
  387. end;
  388. { left must be a register }
  389. left_must_be_reg(opsize,false);
  390. emit_generic_code(op,opsize,true,false,false);
  391. location_freetemp(current_asmdata.CurrAsmList,right.location);
  392. location_freetemp(current_asmdata.CurrAsmList,left.location);
  393. location_reset(location,LOC_FLAGS,OS_NO);
  394. location.resflags:=getresflags(true);
  395. end;
  396. {*****************************************************************************
  397. AddMMX
  398. *****************************************************************************}
  399. {$ifdef SUPPORT_MMX}
  400. procedure tx86addnode.second_opmmx;
  401. var
  402. op : TAsmOp;
  403. cmpop : boolean;
  404. mmxbase : tmmxtype;
  405. hreg,
  406. hregister : tregister;
  407. begin
  408. pass_left_right;
  409. cmpop:=false;
  410. mmxbase:=mmx_type(left.resulttype.def);
  411. location_reset(location,LOC_MMXREGISTER,def_cgsize(resulttype.def));
  412. case nodetype of
  413. addn :
  414. begin
  415. if (cs_mmx_saturation in aktlocalswitches) then
  416. begin
  417. case mmxbase of
  418. mmxs8bit:
  419. op:=A_PADDSB;
  420. mmxu8bit:
  421. op:=A_PADDUSB;
  422. mmxs16bit,mmxfixed16:
  423. op:=A_PADDSW;
  424. mmxu16bit:
  425. op:=A_PADDUSW;
  426. end;
  427. end
  428. else
  429. begin
  430. case mmxbase of
  431. mmxs8bit,mmxu8bit:
  432. op:=A_PADDB;
  433. mmxs16bit,mmxu16bit,mmxfixed16:
  434. op:=A_PADDW;
  435. mmxs32bit,mmxu32bit:
  436. op:=A_PADDD;
  437. end;
  438. end;
  439. end;
  440. muln :
  441. begin
  442. case mmxbase of
  443. mmxs16bit,mmxu16bit:
  444. op:=A_PMULLW;
  445. mmxfixed16:
  446. op:=A_PMULHW;
  447. end;
  448. end;
  449. subn :
  450. begin
  451. if (cs_mmx_saturation in aktlocalswitches) then
  452. begin
  453. case mmxbase of
  454. mmxs8bit:
  455. op:=A_PSUBSB;
  456. mmxu8bit:
  457. op:=A_PSUBUSB;
  458. mmxs16bit,mmxfixed16:
  459. op:=A_PSUBSB;
  460. mmxu16bit:
  461. op:=A_PSUBUSW;
  462. end;
  463. end
  464. else
  465. begin
  466. case mmxbase of
  467. mmxs8bit,mmxu8bit:
  468. op:=A_PSUBB;
  469. mmxs16bit,mmxu16bit,mmxfixed16:
  470. op:=A_PSUBW;
  471. mmxs32bit,mmxu32bit:
  472. op:=A_PSUBD;
  473. end;
  474. end;
  475. end;
  476. xorn:
  477. op:=A_PXOR;
  478. orn:
  479. op:=A_POR;
  480. andn:
  481. op:=A_PAND;
  482. else
  483. internalerror(2003042214);
  484. end;
  485. { left and right no register? }
  486. { then one must be demanded }
  487. if (left.location.loc<>LOC_MMXREGISTER) then
  488. begin
  489. if (right.location.loc=LOC_MMXREGISTER) then
  490. begin
  491. location_swap(left.location,right.location);
  492. toggleflag(nf_swaped);
  493. end
  494. else
  495. begin
  496. { register variable ? }
  497. if (left.location.loc=LOC_CMMXREGISTER) then
  498. begin
  499. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  500. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  501. end
  502. else
  503. begin
  504. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  505. internalerror(200203245);
  506. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  507. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  508. end;
  509. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  510. left.location.register:=hregister;
  511. end;
  512. end;
  513. { at this point, left.location.loc should be LOC_MMXREGISTER }
  514. if right.location.loc<>LOC_MMXREGISTER then
  515. begin
  516. if (nodetype=subn) and (nf_swaped in flags) then
  517. begin
  518. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  519. if right.location.loc=LOC_CMMXREGISTER then
  520. begin
  521. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  522. emit_reg_reg(op,S_NO,left.location.register,hreg);
  523. end
  524. else
  525. begin
  526. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  527. internalerror(200203247);
  528. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  529. emit_reg_reg(op,S_NO,left.location.register,hreg);
  530. end;
  531. location.register:=hreg;
  532. end
  533. else
  534. begin
  535. if (right.location.loc=LOC_CMMXREGISTER) then
  536. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  537. else
  538. begin
  539. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  540. internalerror(200203246);
  541. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  542. end;
  543. location.register:=left.location.register;
  544. end;
  545. end
  546. else
  547. begin
  548. { right.location=LOC_MMXREGISTER }
  549. if (nodetype=subn) and (nf_swaped in flags) then
  550. begin
  551. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  552. location_swap(left.location,right.location);
  553. toggleflag(nf_swaped);
  554. end
  555. else
  556. begin
  557. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  558. end;
  559. location.register:=left.location.register;
  560. end;
  561. location_freetemp(current_asmdata.CurrAsmList,right.location);
  562. if cmpop then
  563. location_freetemp(current_asmdata.CurrAsmList,left.location);
  564. end;
  565. {$endif SUPPORT_MMX}
  566. {*****************************************************************************
  567. addmmxset
  568. *****************************************************************************}
  569. {$ifdef SUPPORT_MMX}
  570. procedure tx86addnode.second_opmmxset;
  571. var opsize : TCGSize;
  572. op : TAsmOp;
  573. cmpop,
  574. noswap : boolean;
  575. begin
  576. pass_left_right;
  577. cmpop:=false;
  578. noswap:=false;
  579. opsize:=OS_32;
  580. case nodetype of
  581. addn:
  582. begin
  583. { are we adding set elements ? }
  584. if right.nodetype=setelementn then
  585. begin
  586. { adding elements is not commutative }
  587. { if nf_swaped in flags then
  588. swapleftright;}
  589. { bts requires both elements to be registers }
  590. { location_force_reg(current_asmdata.CurrAsmList,left.location,opsize_2_cgsize[opsize],false);
  591. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize_2_cgsize[opsize],true);
  592. op:=A_BTS;
  593. noswap:=true;}
  594. end
  595. else
  596. op:=A_POR;
  597. end;
  598. symdifn :
  599. op:=A_PXOR;
  600. muln:
  601. op:=A_PAND;
  602. subn:
  603. op:=A_PANDN;
  604. equaln,
  605. unequaln :
  606. begin
  607. op:=A_PCMPEQD;
  608. cmpop:=true;
  609. end;
  610. lten,gten:
  611. begin
  612. if (not(nf_swaped in flags) and (nodetype = lten)) or
  613. ((nf_swaped in flags) and (nodetype = gten)) then
  614. swapleftright;
  615. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  616. emit_op_right_left(A_AND,opsize);
  617. op:=A_PCMPEQD;
  618. cmpop:=true;
  619. { warning: ugly hack, we need a JE so change the node to equaln }
  620. nodetype:=equaln;
  621. end;
  622. xorn :
  623. op:=A_PXOR;
  624. orn :
  625. op:=A_POR;
  626. andn :
  627. op:=A_PAND;
  628. else
  629. internalerror(2003042215);
  630. end;
  631. { left must be a register }
  632. left_must_be_reg(opsize,noswap);
  633. { emit_generic_code(op,opsize,true,extra_not,false);}
  634. location_freetemp(current_asmdata.CurrAsmList,right.location);
  635. if cmpop then
  636. location_freetemp(current_asmdata.CurrAsmList,left.location);
  637. end;
  638. {$endif SUPPORT_MMX}
  639. {*****************************************************************************
  640. AddFloat
  641. *****************************************************************************}
  642. procedure tx86addnode.second_addfloatsse;
  643. var
  644. op : topcg;
  645. begin
  646. pass_left_right;
  647. if (nf_swaped in flags) then
  648. swapleftright;
  649. case nodetype of
  650. addn :
  651. op:=OP_ADD;
  652. muln :
  653. op:=OP_MUL;
  654. subn :
  655. op:=OP_SUB;
  656. slashn :
  657. op:=OP_DIV;
  658. else
  659. internalerror(200312231);
  660. end;
  661. location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
  662. { we can use only right as left operand if the operation is commutative }
  663. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  664. begin
  665. location.register:=right.location.register;
  666. { force floating point reg. location to be written to memory,
  667. we don't force it to mm register because writing to memory
  668. allows probably shorter code because there is no direct fpu->mm register
  669. copy instruction
  670. }
  671. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  672. location_force_mem(current_asmdata.CurrAsmList,left.location);
  673. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  674. end
  675. else
  676. begin
  677. if not(nf_swaped in flags) then
  678. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  679. location_force_mem(current_asmdata.CurrAsmList,right.location);
  680. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  681. location.register:=left.location.register;
  682. { force floating point reg. location to be written to memory,
  683. we don't force it to mm register because writing to memory
  684. allows probably shorter code because there is no direct fpu->mm register
  685. copy instruction
  686. }
  687. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  688. location_force_mem(current_asmdata.CurrAsmList,right.location);
  689. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  690. end;
  691. end;
  692. procedure tx86addnode.second_cmpfloatsse;
  693. var
  694. op : tasmop;
  695. begin
  696. if is_single(left.resulttype.def) then
  697. op:=A_COMISS
  698. else if is_double(left.resulttype.def) then
  699. op:=A_COMISD
  700. else
  701. internalerror(200402222);
  702. pass_left_right;
  703. location_reset(location,LOC_FLAGS,def_cgsize(resulttype.def));
  704. { we can use only right as left operand if the operation is commutative }
  705. if (right.location.loc=LOC_MMREGISTER) then
  706. begin
  707. { force floating point reg. location to be written to memory,
  708. we don't force it to mm register because writing to memory
  709. allows probably shorter code because there is no direct fpu->mm register
  710. copy instruction
  711. }
  712. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  713. location_force_mem(current_asmdata.CurrAsmList,left.location);
  714. case left.location.loc of
  715. LOC_REFERENCE,LOC_CREFERENCE:
  716. begin
  717. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  718. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  719. end;
  720. LOC_MMREGISTER,LOC_CMMREGISTER:
  721. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  722. else
  723. internalerror(200402221);
  724. end;
  725. if nf_swaped in flags then
  726. exclude(flags,nf_swaped)
  727. else
  728. include(flags,nf_swaped)
  729. end
  730. else
  731. begin
  732. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  733. { force floating point reg. location to be written to memory,
  734. we don't force it to mm register because writing to memory
  735. allows probably shorter code because there is no direct fpu->mm register
  736. copy instruction
  737. }
  738. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  739. location_force_mem(current_asmdata.CurrAsmList,right.location);
  740. case right.location.loc of
  741. LOC_REFERENCE,LOC_CREFERENCE:
  742. begin
  743. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  744. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  745. end;
  746. LOC_MMREGISTER,LOC_CMMREGISTER:
  747. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  748. else
  749. internalerror(200402223);
  750. end;
  751. end;
  752. location.resflags:=getresflags(true);
  753. end;
  754. procedure tx86addnode.second_addfloat;
  755. var
  756. op : TAsmOp;
  757. begin
  758. if use_sse(resulttype.def) then
  759. begin
  760. second_addfloatsse;
  761. exit;
  762. end;
  763. pass_left_right;
  764. case nodetype of
  765. addn :
  766. op:=A_FADDP;
  767. muln :
  768. op:=A_FMULP;
  769. subn :
  770. op:=A_FSUBP;
  771. slashn :
  772. op:=A_FDIVP;
  773. else
  774. internalerror(2003042214);
  775. end;
  776. left_and_right_must_be_fpureg;
  777. { if we swaped the tree nodes, then use the reverse operator }
  778. if nf_swaped in flags then
  779. begin
  780. if (nodetype=slashn) then
  781. op:=A_FDIVRP
  782. else if (nodetype=subn) then
  783. op:=A_FSUBRP;
  784. end;
  785. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  786. tcgx86(cg).dec_fpu_stack;
  787. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  788. location.register:=NR_ST;
  789. end;
  790. procedure tx86addnode.second_cmpfloat;
  791. var
  792. resflags : tresflags;
  793. begin
  794. if use_sse(left.resulttype.def) or use_sse(right.resulttype.def) then
  795. begin
  796. second_cmpfloatsse;
  797. exit;
  798. end;
  799. pass_left_right;
  800. left_and_right_must_be_fpureg;
  801. {$ifndef x86_64}
  802. if aktcputype<cpu_Pentium2 then
  803. begin
  804. emit_none(A_FCOMPP,S_NO);
  805. tcgx86(cg).dec_fpu_stack;
  806. tcgx86(cg).dec_fpu_stack;
  807. { load fpu flags }
  808. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  809. emit_reg(A_FNSTSW,S_NO,NR_AX);
  810. emit_none(A_SAHF,S_NO);
  811. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  812. if nf_swaped in flags then
  813. begin
  814. case nodetype of
  815. equaln : resflags:=F_E;
  816. unequaln : resflags:=F_NE;
  817. ltn : resflags:=F_A;
  818. lten : resflags:=F_AE;
  819. gtn : resflags:=F_B;
  820. gten : resflags:=F_BE;
  821. end;
  822. end
  823. else
  824. begin
  825. case nodetype of
  826. equaln : resflags:=F_E;
  827. unequaln : resflags:=F_NE;
  828. ltn : resflags:=F_B;
  829. lten : resflags:=F_BE;
  830. gtn : resflags:=F_A;
  831. gten : resflags:=F_AE;
  832. end;
  833. end;
  834. end
  835. else
  836. {$endif x86_64}
  837. begin
  838. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  839. { fcomip pops only one fpu register }
  840. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  841. tcgx86(cg).dec_fpu_stack;
  842. tcgx86(cg).dec_fpu_stack;
  843. { load fpu flags }
  844. if nf_swaped in flags then
  845. begin
  846. case nodetype of
  847. equaln : resflags:=F_E;
  848. unequaln : resflags:=F_NE;
  849. ltn : resflags:=F_A;
  850. lten : resflags:=F_AE;
  851. gtn : resflags:=F_B;
  852. gten : resflags:=F_BE;
  853. end;
  854. end
  855. else
  856. begin
  857. case nodetype of
  858. equaln : resflags:=F_E;
  859. unequaln : resflags:=F_NE;
  860. ltn : resflags:=F_B;
  861. lten : resflags:=F_BE;
  862. gtn : resflags:=F_A;
  863. gten : resflags:=F_AE;
  864. end;
  865. end;
  866. end;
  867. location_reset(location,LOC_FLAGS,OS_NO);
  868. location.resflags:=resflags;
  869. end;
  870. {*****************************************************************************
  871. Add64bit
  872. *****************************************************************************}
  873. procedure tx86addnode.second_add64bit;
  874. begin
  875. {$ifdef cpu64bit}
  876. second_addordinal;
  877. {$else cpu64bit}
  878. { must be implemented separate }
  879. internalerror(200402042);
  880. {$endif cpu64bit}
  881. end;
  882. procedure tx86addnode.second_cmp64bit;
  883. begin
  884. {$ifdef cpu64bit}
  885. second_cmpordinal;
  886. {$else cpu64bit}
  887. { must be implemented separate }
  888. internalerror(200402043);
  889. {$endif cpu64bit}
  890. end;
  891. {*****************************************************************************
  892. AddOrdinal
  893. *****************************************************************************}
  894. procedure tx86addnode.second_addordinal;
  895. begin
  896. { filter unsigned MUL opcode, which requires special handling }
  897. if (nodetype=muln) and
  898. (not(is_signed(left.resulttype.def)) or
  899. not(is_signed(right.resulttype.def))) then
  900. begin
  901. second_mul;
  902. exit;
  903. end;
  904. inherited second_addordinal;
  905. end;
  906. procedure tx86addnode.second_cmpordinal;
  907. var
  908. opsize : tcgsize;
  909. unsigned : boolean;
  910. begin
  911. unsigned:=not(is_signed(left.resulttype.def)) or
  912. not(is_signed(right.resulttype.def));
  913. opsize:=def_cgsize(left.resulttype.def);
  914. pass_left_right;
  915. left_must_be_reg(opsize,false);
  916. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  917. location_freetemp(current_asmdata.CurrAsmList,right.location);
  918. location_freetemp(current_asmdata.CurrAsmList,left.location);
  919. location_reset(location,LOC_FLAGS,OS_NO);
  920. location.resflags:=getresflags(unsigned);
  921. end;
  922. begin
  923. caddnode:=tx86addnode;
  924. end.