nx86add.pas 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. procedure second_mul;virtual;abstract;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_addordinal;override;
  40. procedure second_cmpfloat;override;
  41. procedure second_cmpsmallset;override;
  42. procedure second_cmp64bit;override;
  43. procedure second_cmpordinal;override;
  44. {$ifdef SUPPORT_MMX}
  45. procedure second_opmmxset;override;
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. procedure second_opvector;override;
  49. end;
  50. implementation
  51. uses
  52. globtype,globals,
  53. verbose,cutils,
  54. cpuinfo,
  55. aasmbase,aasmtai,aasmdata,aasmcpu,
  56. symconst,symdef,
  57. cgobj,cgx86,cga,cgutils,
  58. paramgr,tgobj,ncgutil,
  59. ncon,nset,
  60. defutil;
  61. {*****************************************************************************
  62. Helpers
  63. *****************************************************************************}
  64. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  65. var
  66. power : longint;
  67. hl4 : tasmlabel;
  68. r : Tregister;
  69. begin
  70. { at this point, left.location.loc should be LOC_REGISTER }
  71. if right.location.loc=LOC_REGISTER then
  72. begin
  73. { right.location is a LOC_REGISTER }
  74. { when swapped another result register }
  75. if (nodetype=subn) and (nf_swapped in flags) then
  76. begin
  77. if extra_not then
  78. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  79. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  80. { newly swapped also set swapped flag }
  81. location_swap(left.location,right.location);
  82. toggleflag(nf_swapped);
  83. end
  84. else
  85. begin
  86. if extra_not then
  87. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  88. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  89. location_swap(left.location,right.location);
  90. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  91. end;
  92. end
  93. else
  94. begin
  95. { right.location is not a LOC_REGISTER }
  96. if (nodetype=subn) and (nf_swapped in flags) then
  97. begin
  98. if extra_not then
  99. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  100. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  101. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  102. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  103. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  104. end
  105. else
  106. begin
  107. { Optimizations when right.location is a constant value }
  108. if (op=A_CMP) and
  109. (nodetype in [equaln,unequaln]) and
  110. (right.location.loc=LOC_CONSTANT) and
  111. (right.location.value=0) then
  112. begin
  113. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  114. end
  115. else
  116. if (op=A_ADD) and
  117. (right.location.loc=LOC_CONSTANT) and
  118. (right.location.value=1) and
  119. not(cs_check_overflow in current_settings.localswitches) then
  120. begin
  121. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  122. end
  123. else
  124. if (op=A_SUB) and
  125. (right.location.loc=LOC_CONSTANT) and
  126. (right.location.value=1) and
  127. not(cs_check_overflow in current_settings.localswitches) then
  128. begin
  129. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  130. end
  131. else
  132. if (op=A_IMUL) and
  133. (right.location.loc=LOC_CONSTANT) and
  134. (ispowerof2(int64(right.location.value),power)) and
  135. not(cs_check_overflow in current_settings.localswitches) then
  136. begin
  137. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  138. end
  139. else
  140. begin
  141. if extra_not then
  142. begin
  143. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  144. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  145. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  146. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  147. end
  148. else
  149. begin
  150. emit_op_right_left(op,opsize);
  151. end;
  152. end;
  153. end;
  154. end;
  155. { only in case of overflow operations }
  156. { produce overflow code }
  157. { we must put it here directly, because sign of operation }
  158. { is in unsigned VAR!! }
  159. if mboverflow then
  160. begin
  161. if cs_check_overflow in current_settings.localswitches then
  162. begin
  163. current_asmdata.getjumplabel(hl4);
  164. if unsigned then
  165. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  166. else
  167. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  168. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW');
  169. cg.a_label(current_asmdata.CurrAsmList,hl4);
  170. end;
  171. end;
  172. end;
  173. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  174. begin
  175. { left location is not a register? }
  176. if (left.location.loc<>LOC_REGISTER) then
  177. begin
  178. { if right is register then we can swap the locations }
  179. if (not noswap) and
  180. (right.location.loc=LOC_REGISTER) then
  181. begin
  182. location_swap(left.location,right.location);
  183. toggleflag(nf_swapped);
  184. end
  185. else
  186. begin
  187. { maybe we can reuse a constant register when the
  188. operation is a comparison that doesn't change the
  189. value of the register }
  190. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  191. end;
  192. end;
  193. if (right.location.loc<>LOC_CONSTANT) and
  194. (tcgsize2unsigned[right.location.size]<>opsize) then
  195. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  196. if (left.location.loc<>LOC_CONSTANT) and
  197. (tcgsize2unsigned[left.location.size]<>opsize) then
  198. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  199. end;
  200. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  201. begin
  202. if (right.location.loc<>LOC_FPUREGISTER) then
  203. begin
  204. if (force_fpureg) then
  205. begin
  206. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  207. if (left.location.loc<>LOC_FPUREGISTER) then
  208. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  209. else
  210. { left was on the stack => swap }
  211. toggleflag(nf_swapped);
  212. end
  213. end
  214. { the nominator in st0 }
  215. else if (left.location.loc<>LOC_FPUREGISTER) then
  216. begin
  217. if (force_fpureg) then
  218. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  219. end
  220. else
  221. begin
  222. { fpu operands are always in the wrong order on the stack }
  223. toggleflag(nf_swapped);
  224. end;
  225. end;
  226. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  227. {$ifdef x86_64}
  228. var
  229. tmpreg : tregister;
  230. {$endif x86_64}
  231. begin
  232. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  233. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
  234. { left must be a register }
  235. case right.location.loc of
  236. LOC_REGISTER,
  237. LOC_CREGISTER :
  238. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  239. LOC_REFERENCE,
  240. LOC_CREFERENCE :
  241. begin
  242. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  243. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  244. end;
  245. LOC_CONSTANT :
  246. begin
  247. {$ifdef x86_64}
  248. { x86_64 only supports signed 32 bits constants directly }
  249. if (opsize in [OS_S64,OS_64]) and
  250. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  251. begin
  252. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  253. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  254. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  255. end
  256. else
  257. {$endif x86_64}
  258. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  259. end;
  260. else
  261. internalerror(200203232);
  262. end;
  263. end;
  264. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  265. begin
  266. case nodetype of
  267. equaln : getresflags:=F_E;
  268. unequaln : getresflags:=F_NE;
  269. else
  270. if not(unsigned) then
  271. begin
  272. if nf_swapped in flags then
  273. case nodetype of
  274. ltn : getresflags:=F_G;
  275. lten : getresflags:=F_GE;
  276. gtn : getresflags:=F_L;
  277. gten : getresflags:=F_LE;
  278. end
  279. else
  280. case nodetype of
  281. ltn : getresflags:=F_L;
  282. lten : getresflags:=F_LE;
  283. gtn : getresflags:=F_G;
  284. gten : getresflags:=F_GE;
  285. end;
  286. end
  287. else
  288. begin
  289. if nf_swapped in flags then
  290. case nodetype of
  291. ltn : getresflags:=F_A;
  292. lten : getresflags:=F_AE;
  293. gtn : getresflags:=F_B;
  294. gten : getresflags:=F_BE;
  295. end
  296. else
  297. case nodetype of
  298. ltn : getresflags:=F_B;
  299. lten : getresflags:=F_BE;
  300. gtn : getresflags:=F_A;
  301. gten : getresflags:=F_AE;
  302. end;
  303. end;
  304. end;
  305. end;
  306. {*****************************************************************************
  307. AddSmallSet
  308. *****************************************************************************}
  309. procedure tx86addnode.second_addsmallset;
  310. var
  311. opsize : TCGSize;
  312. op : TAsmOp;
  313. extra_not,
  314. noswap : boolean;
  315. all_member_optimization:boolean;
  316. begin
  317. pass_left_right;
  318. noswap:=false;
  319. extra_not:=false;
  320. all_member_optimization:=false;
  321. opsize:=int_cgsize(resultdef.size);
  322. case nodetype of
  323. addn :
  324. begin
  325. { adding elements is not commutative }
  326. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  327. swapleftright;
  328. { are we adding set elements ? }
  329. if right.nodetype=setelementn then
  330. begin
  331. { no range support for smallsets! }
  332. if assigned(tsetelementnode(right).right) then
  333. internalerror(43244);
  334. { btsb isn't supported }
  335. if opsize=OS_8 then
  336. opsize:=OS_32;
  337. { bts requires both elements to be registers }
  338. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  339. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  340. op:=A_BTS;
  341. noswap:=true;
  342. end
  343. else
  344. op:=A_OR;
  345. end;
  346. symdifn :
  347. op:=A_XOR;
  348. muln :
  349. op:=A_AND;
  350. subn :
  351. begin
  352. op:=A_AND;
  353. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  354. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  355. all_member_optimization:=true;
  356. if (not(nf_swapped in flags)) and
  357. (right.location.loc=LOC_CONSTANT) then
  358. right.location.value := not(right.location.value)
  359. else if (nf_swapped in flags) and
  360. (left.location.loc=LOC_CONSTANT) then
  361. left.location.value := not(left.location.value)
  362. else
  363. extra_not:=true;
  364. end;
  365. xorn :
  366. op:=A_XOR;
  367. orn :
  368. op:=A_OR;
  369. andn :
  370. op:=A_AND;
  371. else
  372. internalerror(2003042215);
  373. end;
  374. if all_member_optimization then
  375. begin
  376. {A set expression [0..31]-x can be implemented with a simple NOT.}
  377. if nf_swapped in flags then
  378. begin
  379. { newly swapped also set swapped flag }
  380. location_swap(left.location,right.location);
  381. toggleflag(nf_swapped);
  382. end;
  383. location_force_reg(current_asmdata.currAsmList,right.location,opsize,false);
  384. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  385. location:=right.location;
  386. end
  387. else
  388. begin
  389. { left must be a register }
  390. left_must_be_reg(opsize,noswap);
  391. emit_generic_code(op,opsize,true,extra_not,false);
  392. location_freetemp(current_asmdata.CurrAsmList,right.location);
  393. { left is always a register and contains the result }
  394. location:=left.location;
  395. end;
  396. { fix the changed opsize we did above because of the missing btsb }
  397. if opsize<>int_cgsize(resultdef.size) then
  398. location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false);
  399. end;
  400. procedure tx86addnode.second_cmpsmallset;
  401. var
  402. opsize : TCGSize;
  403. op : TAsmOp;
  404. begin
  405. pass_left_right;
  406. opsize:=int_cgsize(left.resultdef.size);
  407. case nodetype of
  408. equaln,
  409. unequaln :
  410. op:=A_CMP;
  411. lten,gten:
  412. begin
  413. if (not(nf_swapped in flags) and (nodetype = lten)) or
  414. ((nf_swapped in flags) and (nodetype = gten)) then
  415. swapleftright;
  416. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  417. emit_op_right_left(A_AND,opsize);
  418. op:=A_CMP;
  419. { warning: ugly hack, we need a JE so change the node to equaln }
  420. nodetype:=equaln;
  421. end;
  422. else
  423. internalerror(2003042215);
  424. end;
  425. { left must be a register }
  426. left_must_be_reg(opsize,false);
  427. emit_generic_code(op,opsize,true,false,false);
  428. location_freetemp(current_asmdata.CurrAsmList,right.location);
  429. location_freetemp(current_asmdata.CurrAsmList,left.location);
  430. location_reset(location,LOC_FLAGS,OS_NO);
  431. location.resflags:=getresflags(true);
  432. end;
  433. {*****************************************************************************
  434. AddMMX
  435. *****************************************************************************}
  436. {$ifdef SUPPORT_MMX}
  437. procedure tx86addnode.second_opmmx;
  438. var
  439. op : TAsmOp;
  440. cmpop : boolean;
  441. mmxbase : tmmxtype;
  442. hreg,
  443. hregister : tregister;
  444. begin
  445. pass_left_right;
  446. cmpop:=false;
  447. mmxbase:=mmx_type(left.resultdef);
  448. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  449. case nodetype of
  450. addn :
  451. begin
  452. if (cs_mmx_saturation in current_settings.localswitches) then
  453. begin
  454. case mmxbase of
  455. mmxs8bit:
  456. op:=A_PADDSB;
  457. mmxu8bit:
  458. op:=A_PADDUSB;
  459. mmxs16bit,mmxfixed16:
  460. op:=A_PADDSW;
  461. mmxu16bit:
  462. op:=A_PADDUSW;
  463. end;
  464. end
  465. else
  466. begin
  467. case mmxbase of
  468. mmxs8bit,mmxu8bit:
  469. op:=A_PADDB;
  470. mmxs16bit,mmxu16bit,mmxfixed16:
  471. op:=A_PADDW;
  472. mmxs32bit,mmxu32bit:
  473. op:=A_PADDD;
  474. end;
  475. end;
  476. end;
  477. muln :
  478. begin
  479. case mmxbase of
  480. mmxs16bit,mmxu16bit:
  481. op:=A_PMULLW;
  482. mmxfixed16:
  483. op:=A_PMULHW;
  484. end;
  485. end;
  486. subn :
  487. begin
  488. if (cs_mmx_saturation in current_settings.localswitches) then
  489. begin
  490. case mmxbase of
  491. mmxs8bit:
  492. op:=A_PSUBSB;
  493. mmxu8bit:
  494. op:=A_PSUBUSB;
  495. mmxs16bit,mmxfixed16:
  496. op:=A_PSUBSB;
  497. mmxu16bit:
  498. op:=A_PSUBUSW;
  499. end;
  500. end
  501. else
  502. begin
  503. case mmxbase of
  504. mmxs8bit,mmxu8bit:
  505. op:=A_PSUBB;
  506. mmxs16bit,mmxu16bit,mmxfixed16:
  507. op:=A_PSUBW;
  508. mmxs32bit,mmxu32bit:
  509. op:=A_PSUBD;
  510. end;
  511. end;
  512. end;
  513. xorn:
  514. op:=A_PXOR;
  515. orn:
  516. op:=A_POR;
  517. andn:
  518. op:=A_PAND;
  519. else
  520. internalerror(2003042214);
  521. end;
  522. { left and right no register? }
  523. { then one must be demanded }
  524. if (left.location.loc<>LOC_MMXREGISTER) then
  525. begin
  526. if (right.location.loc=LOC_MMXREGISTER) then
  527. begin
  528. location_swap(left.location,right.location);
  529. toggleflag(nf_swapped);
  530. end
  531. else
  532. begin
  533. { register variable ? }
  534. if (left.location.loc=LOC_CMMXREGISTER) then
  535. begin
  536. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  537. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  538. end
  539. else
  540. begin
  541. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  542. internalerror(200203245);
  543. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  544. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  545. end;
  546. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  547. left.location.register:=hregister;
  548. end;
  549. end;
  550. { at this point, left.location.loc should be LOC_MMXREGISTER }
  551. if right.location.loc<>LOC_MMXREGISTER then
  552. begin
  553. if (nodetype=subn) and (nf_swapped in flags) then
  554. begin
  555. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  556. if right.location.loc=LOC_CMMXREGISTER then
  557. begin
  558. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  559. emit_reg_reg(op,S_NO,left.location.register,hreg);
  560. end
  561. else
  562. begin
  563. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  564. internalerror(200203247);
  565. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  566. emit_reg_reg(op,S_NO,left.location.register,hreg);
  567. end;
  568. location.register:=hreg;
  569. end
  570. else
  571. begin
  572. if (right.location.loc=LOC_CMMXREGISTER) then
  573. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  574. else
  575. begin
  576. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  577. internalerror(200203246);
  578. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  579. end;
  580. location.register:=left.location.register;
  581. end;
  582. end
  583. else
  584. begin
  585. { right.location=LOC_MMXREGISTER }
  586. if (nodetype=subn) and (nf_swapped in flags) then
  587. begin
  588. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  589. location_swap(left.location,right.location);
  590. toggleflag(nf_swapped);
  591. end
  592. else
  593. begin
  594. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  595. end;
  596. location.register:=left.location.register;
  597. end;
  598. location_freetemp(current_asmdata.CurrAsmList,right.location);
  599. if cmpop then
  600. location_freetemp(current_asmdata.CurrAsmList,left.location);
  601. end;
  602. {$endif SUPPORT_MMX}
  603. {*****************************************************************************
  604. addmmxset
  605. *****************************************************************************}
  606. {$ifdef SUPPORT_MMX}
  607. procedure tx86addnode.second_opmmxset;
  608. var opsize : TCGSize;
  609. op : TAsmOp;
  610. cmpop,
  611. noswap : boolean;
  612. begin
  613. pass_left_right;
  614. cmpop:=false;
  615. noswap:=false;
  616. opsize:=OS_32;
  617. case nodetype of
  618. addn:
  619. begin
  620. { are we adding set elements ? }
  621. if right.nodetype=setelementn then
  622. begin
  623. { adding elements is not commutative }
  624. { if nf_swapped in flags then
  625. swapleftright;}
  626. { bts requires both elements to be registers }
  627. { location_force_reg(current_asmdata.CurrAsmList,left.location,opsize_2_cgsize[opsize],false);
  628. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize_2_cgsize[opsize],true);
  629. op:=A_BTS;
  630. noswap:=true;}
  631. end
  632. else
  633. op:=A_POR;
  634. end;
  635. symdifn :
  636. op:=A_PXOR;
  637. muln:
  638. op:=A_PAND;
  639. subn:
  640. op:=A_PANDN;
  641. equaln,
  642. unequaln :
  643. begin
  644. op:=A_PCMPEQD;
  645. cmpop:=true;
  646. end;
  647. lten,gten:
  648. begin
  649. if (not(nf_swapped in flags) and (nodetype = lten)) or
  650. ((nf_swapped in flags) and (nodetype = gten)) then
  651. swapleftright;
  652. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  653. emit_op_right_left(A_AND,opsize);
  654. op:=A_PCMPEQD;
  655. cmpop:=true;
  656. { warning: ugly hack, we need a JE so change the node to equaln }
  657. nodetype:=equaln;
  658. end;
  659. xorn :
  660. op:=A_PXOR;
  661. orn :
  662. op:=A_POR;
  663. andn :
  664. op:=A_PAND;
  665. else
  666. internalerror(2003042215);
  667. end;
  668. { left must be a register }
  669. left_must_be_reg(opsize,noswap);
  670. { emit_generic_code(op,opsize,true,extra_not,false);}
  671. location_freetemp(current_asmdata.CurrAsmList,right.location);
  672. if cmpop then
  673. location_freetemp(current_asmdata.CurrAsmList,left.location);
  674. end;
  675. {$endif SUPPORT_MMX}
  676. {*****************************************************************************
  677. AddFloat
  678. *****************************************************************************}
  679. procedure tx86addnode.second_addfloatsse;
  680. var
  681. op : topcg;
  682. begin
  683. pass_left_right;
  684. check_left_and_right_fpureg(false);
  685. if (nf_swapped in flags) then
  686. { can't use swapleftright if both are on the fpu stack, since then }
  687. { both are "R_ST" -> nothing would change -> manually switch }
  688. if (left.location.loc = LOC_FPUREGISTER) and
  689. (right.location.loc = LOC_FPUREGISTER) then
  690. emit_none(A_FXCH,S_NO)
  691. else
  692. swapleftright;
  693. case nodetype of
  694. addn :
  695. op:=OP_ADD;
  696. muln :
  697. op:=OP_MUL;
  698. subn :
  699. op:=OP_SUB;
  700. slashn :
  701. op:=OP_DIV;
  702. else
  703. internalerror(200312231);
  704. end;
  705. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  706. { we can use only right as left operand if the operation is commutative }
  707. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  708. begin
  709. location.register:=right.location.register;
  710. { force floating point reg. location to be written to memory,
  711. we don't force it to mm register because writing to memory
  712. allows probably shorter code because there is no direct fpu->mm register
  713. copy instruction
  714. }
  715. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  716. location_force_mem(current_asmdata.CurrAsmList,left.location);
  717. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  718. end
  719. else
  720. begin
  721. if (nf_swapped in flags) then
  722. swapleftright;
  723. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  724. location.register:=left.location.register;
  725. { force floating point reg. location to be written to memory,
  726. we don't force it to mm register because writing to memory
  727. allows probably shorter code because there is no direct fpu->mm register
  728. copy instruction
  729. }
  730. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  731. location_force_mem(current_asmdata.CurrAsmList,right.location);
  732. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  733. end;
  734. end;
  735. procedure tx86addnode.second_cmpfloatsse;
  736. var
  737. op : tasmop;
  738. begin
  739. if is_single(left.resultdef) then
  740. op:=A_COMISS
  741. else if is_double(left.resultdef) then
  742. op:=A_COMISD
  743. else
  744. internalerror(200402222);
  745. pass_left_right;
  746. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  747. { we can use only right as left operand if the operation is commutative }
  748. if (right.location.loc=LOC_MMREGISTER) then
  749. begin
  750. { force floating point reg. location to be written to memory,
  751. we don't force it to mm register because writing to memory
  752. allows probably shorter code because there is no direct fpu->mm register
  753. copy instruction
  754. }
  755. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  756. location_force_mem(current_asmdata.CurrAsmList,left.location);
  757. case left.location.loc of
  758. LOC_REFERENCE,LOC_CREFERENCE:
  759. begin
  760. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  761. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  762. end;
  763. LOC_MMREGISTER,LOC_CMMREGISTER:
  764. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  765. else
  766. internalerror(200402221);
  767. end;
  768. if nf_swapped in flags then
  769. exclude(flags,nf_swapped)
  770. else
  771. include(flags,nf_swapped)
  772. end
  773. else
  774. begin
  775. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  776. { force floating point reg. location to be written to memory,
  777. we don't force it to mm register because writing to memory
  778. allows probably shorter code because there is no direct fpu->mm register
  779. copy instruction
  780. }
  781. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  782. location_force_mem(current_asmdata.CurrAsmList,right.location);
  783. case right.location.loc of
  784. LOC_REFERENCE,LOC_CREFERENCE:
  785. begin
  786. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  787. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  788. end;
  789. LOC_MMREGISTER,LOC_CMMREGISTER:
  790. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  791. else
  792. internalerror(200402223);
  793. end;
  794. end;
  795. location.resflags:=getresflags(true);
  796. end;
  797. procedure tx86addnode.second_opvector;
  798. var
  799. op : topcg;
  800. begin
  801. pass_left_right;
  802. if (nf_swapped in flags) then
  803. swapleftright;
  804. case nodetype of
  805. addn :
  806. op:=OP_ADD;
  807. muln :
  808. op:=OP_MUL;
  809. subn :
  810. op:=OP_SUB;
  811. slashn :
  812. op:=OP_DIV;
  813. else
  814. internalerror(200610071);
  815. end;
  816. if fits_in_mm_register(left.resultdef) then
  817. begin
  818. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  819. { we can use only right as left operand if the operation is commutative }
  820. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  821. begin
  822. location.register:=right.location.register;
  823. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  824. end
  825. else
  826. begin
  827. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  828. location.register:=left.location.register;
  829. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  830. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  831. end;
  832. end
  833. else
  834. begin
  835. { not yet supported }
  836. internalerror(200610072);
  837. end
  838. end;
  839. procedure tx86addnode.second_addfloat;
  840. var
  841. op : TAsmOp;
  842. begin
  843. if use_sse(resultdef) then
  844. begin
  845. second_addfloatsse;
  846. exit;
  847. end;
  848. pass_left_right;
  849. case nodetype of
  850. addn :
  851. op:=A_FADDP;
  852. muln :
  853. op:=A_FMULP;
  854. subn :
  855. op:=A_FSUBP;
  856. slashn :
  857. op:=A_FDIVP;
  858. else
  859. internalerror(2003042214);
  860. end;
  861. check_left_and_right_fpureg(true);
  862. { if we swaped the tree nodes, then use the reverse operator }
  863. if nf_swapped in flags then
  864. begin
  865. if (nodetype=slashn) then
  866. op:=A_FDIVRP
  867. else if (nodetype=subn) then
  868. op:=A_FSUBRP;
  869. end;
  870. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  871. tcgx86(cg).dec_fpu_stack;
  872. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  873. location.register:=NR_ST;
  874. end;
  875. procedure tx86addnode.second_cmpfloat;
  876. var
  877. resflags : tresflags;
  878. begin
  879. if use_sse(left.resultdef) or use_sse(right.resultdef) then
  880. begin
  881. second_cmpfloatsse;
  882. exit;
  883. end;
  884. pass_left_right;
  885. check_left_and_right_fpureg(true);
  886. {$ifndef x86_64}
  887. if current_settings.cputype<cpu_Pentium2 then
  888. begin
  889. emit_none(A_FCOMPP,S_NO);
  890. tcgx86(cg).dec_fpu_stack;
  891. tcgx86(cg).dec_fpu_stack;
  892. { load fpu flags }
  893. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  894. emit_reg(A_FNSTSW,S_NO,NR_AX);
  895. emit_none(A_SAHF,S_NO);
  896. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  897. if nf_swapped in flags then
  898. begin
  899. case nodetype of
  900. equaln : resflags:=F_E;
  901. unequaln : resflags:=F_NE;
  902. ltn : resflags:=F_A;
  903. lten : resflags:=F_AE;
  904. gtn : resflags:=F_B;
  905. gten : resflags:=F_BE;
  906. end;
  907. end
  908. else
  909. begin
  910. case nodetype of
  911. equaln : resflags:=F_E;
  912. unequaln : resflags:=F_NE;
  913. ltn : resflags:=F_B;
  914. lten : resflags:=F_BE;
  915. gtn : resflags:=F_A;
  916. gten : resflags:=F_AE;
  917. end;
  918. end;
  919. end
  920. else
  921. {$endif x86_64}
  922. begin
  923. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  924. { fcomip pops only one fpu register }
  925. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  926. tcgx86(cg).dec_fpu_stack;
  927. tcgx86(cg).dec_fpu_stack;
  928. { load fpu flags }
  929. if nf_swapped in flags then
  930. begin
  931. case nodetype of
  932. equaln : resflags:=F_E;
  933. unequaln : resflags:=F_NE;
  934. ltn : resflags:=F_A;
  935. lten : resflags:=F_AE;
  936. gtn : resflags:=F_B;
  937. gten : resflags:=F_BE;
  938. end;
  939. end
  940. else
  941. begin
  942. case nodetype of
  943. equaln : resflags:=F_E;
  944. unequaln : resflags:=F_NE;
  945. ltn : resflags:=F_B;
  946. lten : resflags:=F_BE;
  947. gtn : resflags:=F_A;
  948. gten : resflags:=F_AE;
  949. end;
  950. end;
  951. end;
  952. location_reset(location,LOC_FLAGS,OS_NO);
  953. location.resflags:=resflags;
  954. end;
  955. {*****************************************************************************
  956. Add64bit
  957. *****************************************************************************}
  958. procedure tx86addnode.second_add64bit;
  959. begin
  960. {$ifdef cpu64bit}
  961. second_addordinal;
  962. {$else cpu64bit}
  963. { must be implemented separate }
  964. internalerror(200402042);
  965. {$endif cpu64bit}
  966. end;
  967. procedure tx86addnode.second_cmp64bit;
  968. begin
  969. {$ifdef cpu64bit}
  970. second_cmpordinal;
  971. {$else cpu64bit}
  972. { must be implemented separate }
  973. internalerror(200402043);
  974. {$endif cpu64bit}
  975. end;
  976. {*****************************************************************************
  977. AddOrdinal
  978. *****************************************************************************}
  979. procedure tx86addnode.second_addordinal;
  980. begin
  981. { filter unsigned MUL opcode, which requires special handling }
  982. if (nodetype=muln) and
  983. (not(is_signed(left.resultdef)) or
  984. not(is_signed(right.resultdef))) then
  985. begin
  986. second_mul;
  987. exit;
  988. end;
  989. inherited second_addordinal;
  990. end;
  991. procedure tx86addnode.second_cmpordinal;
  992. var
  993. opsize : tcgsize;
  994. unsigned : boolean;
  995. begin
  996. unsigned:=not(is_signed(left.resultdef)) or
  997. not(is_signed(right.resultdef));
  998. opsize:=def_cgsize(left.resultdef);
  999. pass_left_right;
  1000. left_must_be_reg(opsize,false);
  1001. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1002. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1003. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1004. location_reset(location,LOC_FLAGS,OS_NO);
  1005. location.resflags:=getresflags(unsigned);
  1006. end;
  1007. begin
  1008. caddnode:=tx86addnode;
  1009. end.