2
0

nx86add.pas 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. public
  35. procedure second_addfloat;override;
  36. procedure second_addsmallset;override;
  37. procedure second_add64bit;override;
  38. procedure second_cmpfloat;override;
  39. procedure second_cmpsmallset;override;
  40. procedure second_cmp64bit;override;
  41. procedure second_cmpordinal;override;
  42. {$ifdef SUPPORT_MMX}
  43. procedure second_opmmx;override;
  44. {$endif SUPPORT_MMX}
  45. procedure second_opvector;override;
  46. end;
  47. implementation
  48. uses
  49. globtype,globals,
  50. verbose,cutils,
  51. cpuinfo,
  52. aasmbase,aasmtai,aasmdata,aasmcpu,
  53. symconst,symdef,
  54. cgobj,cgx86,cga,cgutils,
  55. paramgr,tgobj,ncgutil,
  56. ncon,nset,
  57. defutil;
  58. {*****************************************************************************
  59. Helpers
  60. *****************************************************************************}
  61. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  62. var
  63. power : longint;
  64. hl4 : tasmlabel;
  65. r : Tregister;
  66. begin
  67. { at this point, left.location.loc should be LOC_REGISTER }
  68. if right.location.loc=LOC_REGISTER then
  69. begin
  70. { right.location is a LOC_REGISTER }
  71. { when swapped another result register }
  72. if (nodetype=subn) and (nf_swapped in flags) then
  73. begin
  74. if extra_not then
  75. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  76. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  77. { newly swapped also set swapped flag }
  78. location_swap(left.location,right.location);
  79. toggleflag(nf_swapped);
  80. end
  81. else
  82. begin
  83. if extra_not then
  84. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  85. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  86. location_swap(left.location,right.location);
  87. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  88. end;
  89. end
  90. else
  91. begin
  92. { right.location is not a LOC_REGISTER }
  93. if (nodetype=subn) and (nf_swapped in flags) then
  94. begin
  95. if extra_not then
  96. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  97. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  98. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  99. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  100. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  101. end
  102. else
  103. begin
  104. { Optimizations when right.location is a constant value }
  105. if (op=A_CMP) and
  106. (nodetype in [equaln,unequaln]) and
  107. (right.location.loc=LOC_CONSTANT) and
  108. (right.location.value=0) then
  109. begin
  110. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  111. end
  112. else
  113. if (op=A_ADD) and
  114. (right.location.loc=LOC_CONSTANT) and
  115. (right.location.value=1) and
  116. not(cs_check_overflow in current_settings.localswitches) then
  117. begin
  118. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  119. end
  120. else
  121. if (op=A_SUB) and
  122. (right.location.loc=LOC_CONSTANT) and
  123. (right.location.value=1) and
  124. not(cs_check_overflow in current_settings.localswitches) then
  125. begin
  126. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  127. end
  128. else
  129. if (op=A_IMUL) and
  130. (right.location.loc=LOC_CONSTANT) and
  131. (ispowerof2(int64(right.location.value),power)) and
  132. not(cs_check_overflow in current_settings.localswitches) then
  133. begin
  134. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  135. end
  136. else
  137. begin
  138. if extra_not then
  139. begin
  140. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  141. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  142. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  143. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  144. end
  145. else
  146. begin
  147. emit_op_right_left(op,opsize);
  148. end;
  149. end;
  150. end;
  151. end;
  152. { only in case of overflow operations }
  153. { produce overflow code }
  154. { we must put it here directly, because sign of operation }
  155. { is in unsigned VAR!! }
  156. if mboverflow then
  157. begin
  158. if cs_check_overflow in current_settings.localswitches then
  159. begin
  160. current_asmdata.getjumplabel(hl4);
  161. if unsigned then
  162. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  163. else
  164. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  165. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  166. cg.a_label(current_asmdata.CurrAsmList,hl4);
  167. end;
  168. end;
  169. end;
  170. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  171. begin
  172. { left location is not a register? }
  173. if (left.location.loc<>LOC_REGISTER) then
  174. begin
  175. { if right is register then we can swap the locations }
  176. if (not noswap) and
  177. (right.location.loc=LOC_REGISTER) then
  178. begin
  179. location_swap(left.location,right.location);
  180. toggleflag(nf_swapped);
  181. end
  182. else
  183. begin
  184. { maybe we can reuse a constant register when the
  185. operation is a comparison that doesn't change the
  186. value of the register }
  187. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  188. end;
  189. end;
  190. if (right.location.loc<>LOC_CONSTANT) and
  191. (tcgsize2unsigned[right.location.size]<>opsize) then
  192. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  193. if (left.location.loc<>LOC_CONSTANT) and
  194. (tcgsize2unsigned[left.location.size]<>opsize) then
  195. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  196. end;
  197. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  198. begin
  199. if (right.location.loc<>LOC_FPUREGISTER) then
  200. begin
  201. if (force_fpureg) then
  202. begin
  203. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  204. if (left.location.loc<>LOC_FPUREGISTER) then
  205. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  206. else
  207. { left was on the stack => swap }
  208. toggleflag(nf_swapped);
  209. end
  210. end
  211. { the nominator in st0 }
  212. else if (left.location.loc<>LOC_FPUREGISTER) then
  213. begin
  214. if (force_fpureg) then
  215. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  216. end
  217. else
  218. begin
  219. { fpu operands are always in the wrong order on the stack }
  220. toggleflag(nf_swapped);
  221. end;
  222. end;
  223. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  224. {$ifdef x86_64}
  225. var
  226. tmpreg : tregister;
  227. {$endif x86_64}
  228. begin
  229. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  230. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
  231. { left must be a register }
  232. case right.location.loc of
  233. LOC_REGISTER,
  234. LOC_CREGISTER :
  235. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  236. LOC_REFERENCE,
  237. LOC_CREFERENCE :
  238. begin
  239. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  240. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  241. end;
  242. LOC_CONSTANT :
  243. begin
  244. {$ifdef x86_64}
  245. { x86_64 only supports signed 32 bits constants directly }
  246. if (opsize in [OS_S64,OS_64]) and
  247. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  248. begin
  249. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  250. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  251. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  252. end
  253. else
  254. {$endif x86_64}
  255. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  256. end;
  257. else
  258. internalerror(200203232);
  259. end;
  260. end;
  261. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  262. begin
  263. case nodetype of
  264. equaln : getresflags:=F_E;
  265. unequaln : getresflags:=F_NE;
  266. else
  267. if not(unsigned) then
  268. begin
  269. if nf_swapped in flags then
  270. case nodetype of
  271. ltn : getresflags:=F_G;
  272. lten : getresflags:=F_GE;
  273. gtn : getresflags:=F_L;
  274. gten : getresflags:=F_LE;
  275. end
  276. else
  277. case nodetype of
  278. ltn : getresflags:=F_L;
  279. lten : getresflags:=F_LE;
  280. gtn : getresflags:=F_G;
  281. gten : getresflags:=F_GE;
  282. end;
  283. end
  284. else
  285. begin
  286. if nf_swapped in flags then
  287. case nodetype of
  288. ltn : getresflags:=F_A;
  289. lten : getresflags:=F_AE;
  290. gtn : getresflags:=F_B;
  291. gten : getresflags:=F_BE;
  292. end
  293. else
  294. case nodetype of
  295. ltn : getresflags:=F_B;
  296. lten : getresflags:=F_BE;
  297. gtn : getresflags:=F_A;
  298. gten : getresflags:=F_AE;
  299. end;
  300. end;
  301. end;
  302. end;
  303. {*****************************************************************************
  304. AddSmallSet
  305. *****************************************************************************}
  306. procedure tx86addnode.second_addsmallset;
  307. var
  308. setbase : aint;
  309. opsize : TCGSize;
  310. op : TAsmOp;
  311. extra_not,
  312. noswap : boolean;
  313. all_member_optimization:boolean;
  314. begin
  315. pass_left_right;
  316. noswap:=false;
  317. extra_not:=false;
  318. all_member_optimization:=false;
  319. opsize:=int_cgsize(resultdef.size);
  320. if (left.resultdef.typ=setdef) then
  321. setbase:=tsetdef(left.resultdef).setbase
  322. else
  323. setbase:=tsetdef(right.resultdef).setbase;
  324. case nodetype of
  325. addn :
  326. begin
  327. { adding elements is not commutative }
  328. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  329. swapleftright;
  330. { are we adding set elements ? }
  331. if right.nodetype=setelementn then
  332. begin
  333. { no range support for smallsets! }
  334. if assigned(tsetelementnode(right).right) then
  335. internalerror(43244);
  336. { btsb isn't supported }
  337. if opsize=OS_8 then
  338. opsize:=OS_32;
  339. { bts requires both elements to be registers }
  340. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  341. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  342. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  343. op:=A_BTS;
  344. noswap:=true;
  345. end
  346. else
  347. op:=A_OR;
  348. end;
  349. symdifn :
  350. op:=A_XOR;
  351. muln :
  352. op:=A_AND;
  353. subn :
  354. begin
  355. op:=A_AND;
  356. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  357. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  358. all_member_optimization:=true;
  359. if (not(nf_swapped in flags)) and
  360. (right.location.loc=LOC_CONSTANT) then
  361. right.location.value := not(right.location.value)
  362. else if (nf_swapped in flags) and
  363. (left.location.loc=LOC_CONSTANT) then
  364. left.location.value := not(left.location.value)
  365. else
  366. extra_not:=true;
  367. end;
  368. xorn :
  369. op:=A_XOR;
  370. orn :
  371. op:=A_OR;
  372. andn :
  373. op:=A_AND;
  374. else
  375. internalerror(2003042215);
  376. end;
  377. if all_member_optimization then
  378. begin
  379. {A set expression [0..31]-x can be implemented with a simple NOT.}
  380. if nf_swapped in flags then
  381. begin
  382. { newly swapped also set swapped flag }
  383. location_swap(left.location,right.location);
  384. toggleflag(nf_swapped);
  385. end;
  386. location_force_reg(current_asmdata.currAsmList,right.location,opsize,false);
  387. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  388. location:=right.location;
  389. end
  390. else
  391. begin
  392. { left must be a register }
  393. left_must_be_reg(opsize,noswap);
  394. emit_generic_code(op,opsize,true,extra_not,false);
  395. location_freetemp(current_asmdata.CurrAsmList,right.location);
  396. { left is always a register and contains the result }
  397. location:=left.location;
  398. end;
  399. { fix the changed opsize we did above because of the missing btsb }
  400. if opsize<>int_cgsize(resultdef.size) then
  401. location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false);
  402. end;
  403. procedure tx86addnode.second_cmpsmallset;
  404. var
  405. opsize : TCGSize;
  406. op : TAsmOp;
  407. begin
  408. pass_left_right;
  409. opsize:=int_cgsize(left.resultdef.size);
  410. case nodetype of
  411. equaln,
  412. unequaln :
  413. op:=A_CMP;
  414. lten,gten:
  415. begin
  416. if (not(nf_swapped in flags) and (nodetype = lten)) or
  417. ((nf_swapped in flags) and (nodetype = gten)) then
  418. swapleftright;
  419. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  420. emit_op_right_left(A_AND,opsize);
  421. op:=A_CMP;
  422. { warning: ugly hack, we need a JE so change the node to equaln }
  423. nodetype:=equaln;
  424. end;
  425. else
  426. internalerror(2003042215);
  427. end;
  428. { left must be a register }
  429. left_must_be_reg(opsize,false);
  430. emit_generic_code(op,opsize,true,false,false);
  431. location_freetemp(current_asmdata.CurrAsmList,right.location);
  432. location_freetemp(current_asmdata.CurrAsmList,left.location);
  433. location_reset(location,LOC_FLAGS,OS_NO);
  434. location.resflags:=getresflags(true);
  435. end;
  436. {*****************************************************************************
  437. AddMMX
  438. *****************************************************************************}
  439. {$ifdef SUPPORT_MMX}
  440. procedure tx86addnode.second_opmmx;
  441. var
  442. op : TAsmOp;
  443. cmpop : boolean;
  444. mmxbase : tmmxtype;
  445. hreg,
  446. hregister : tregister;
  447. begin
  448. pass_left_right;
  449. cmpop:=false;
  450. mmxbase:=mmx_type(left.resultdef);
  451. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  452. case nodetype of
  453. addn :
  454. begin
  455. if (cs_mmx_saturation in current_settings.localswitches) then
  456. begin
  457. case mmxbase of
  458. mmxs8bit:
  459. op:=A_PADDSB;
  460. mmxu8bit:
  461. op:=A_PADDUSB;
  462. mmxs16bit,mmxfixed16:
  463. op:=A_PADDSW;
  464. mmxu16bit:
  465. op:=A_PADDUSW;
  466. end;
  467. end
  468. else
  469. begin
  470. case mmxbase of
  471. mmxs8bit,mmxu8bit:
  472. op:=A_PADDB;
  473. mmxs16bit,mmxu16bit,mmxfixed16:
  474. op:=A_PADDW;
  475. mmxs32bit,mmxu32bit:
  476. op:=A_PADDD;
  477. end;
  478. end;
  479. end;
  480. muln :
  481. begin
  482. case mmxbase of
  483. mmxs16bit,mmxu16bit:
  484. op:=A_PMULLW;
  485. mmxfixed16:
  486. op:=A_PMULHW;
  487. end;
  488. end;
  489. subn :
  490. begin
  491. if (cs_mmx_saturation in current_settings.localswitches) then
  492. begin
  493. case mmxbase of
  494. mmxs8bit:
  495. op:=A_PSUBSB;
  496. mmxu8bit:
  497. op:=A_PSUBUSB;
  498. mmxs16bit,mmxfixed16:
  499. op:=A_PSUBSB;
  500. mmxu16bit:
  501. op:=A_PSUBUSW;
  502. end;
  503. end
  504. else
  505. begin
  506. case mmxbase of
  507. mmxs8bit,mmxu8bit:
  508. op:=A_PSUBB;
  509. mmxs16bit,mmxu16bit,mmxfixed16:
  510. op:=A_PSUBW;
  511. mmxs32bit,mmxu32bit:
  512. op:=A_PSUBD;
  513. end;
  514. end;
  515. end;
  516. xorn:
  517. op:=A_PXOR;
  518. orn:
  519. op:=A_POR;
  520. andn:
  521. op:=A_PAND;
  522. else
  523. internalerror(2003042214);
  524. end;
  525. { left and right no register? }
  526. { then one must be demanded }
  527. if (left.location.loc<>LOC_MMXREGISTER) then
  528. begin
  529. if (right.location.loc=LOC_MMXREGISTER) then
  530. begin
  531. location_swap(left.location,right.location);
  532. toggleflag(nf_swapped);
  533. end
  534. else
  535. begin
  536. { register variable ? }
  537. if (left.location.loc=LOC_CMMXREGISTER) then
  538. begin
  539. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  540. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  541. end
  542. else
  543. begin
  544. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  545. internalerror(200203245);
  546. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  547. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  548. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  549. end;
  550. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  551. left.location.register:=hregister;
  552. end;
  553. end;
  554. { at this point, left.location.loc should be LOC_MMXREGISTER }
  555. if right.location.loc<>LOC_MMXREGISTER then
  556. begin
  557. if (nodetype=subn) and (nf_swapped in flags) then
  558. begin
  559. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  560. if right.location.loc=LOC_CMMXREGISTER then
  561. begin
  562. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  563. emit_reg_reg(op,S_NO,left.location.register,hreg);
  564. end
  565. else
  566. begin
  567. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  568. internalerror(200203247);
  569. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  570. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  571. emit_reg_reg(op,S_NO,left.location.register,hreg);
  572. end;
  573. location.register:=hreg;
  574. end
  575. else
  576. begin
  577. if (right.location.loc=LOC_CMMXREGISTER) then
  578. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  579. else
  580. begin
  581. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  582. internalerror(200203246);
  583. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  584. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  585. end;
  586. location.register:=left.location.register;
  587. end;
  588. end
  589. else
  590. begin
  591. { right.location=LOC_MMXREGISTER }
  592. if (nodetype=subn) and (nf_swapped in flags) then
  593. begin
  594. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  595. location_swap(left.location,right.location);
  596. toggleflag(nf_swapped);
  597. end
  598. else
  599. begin
  600. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  601. end;
  602. location.register:=left.location.register;
  603. end;
  604. location_freetemp(current_asmdata.CurrAsmList,right.location);
  605. if cmpop then
  606. location_freetemp(current_asmdata.CurrAsmList,left.location);
  607. end;
  608. {$endif SUPPORT_MMX}
  609. {*****************************************************************************
  610. AddFloat
  611. *****************************************************************************}
  612. procedure tx86addnode.second_addfloatsse;
  613. var
  614. op : topcg;
  615. begin
  616. pass_left_right;
  617. check_left_and_right_fpureg(false);
  618. if (nf_swapped in flags) then
  619. { can't use swapleftright if both are on the fpu stack, since then }
  620. { both are "R_ST" -> nothing would change -> manually switch }
  621. if (left.location.loc = LOC_FPUREGISTER) and
  622. (right.location.loc = LOC_FPUREGISTER) then
  623. emit_none(A_FXCH,S_NO)
  624. else
  625. swapleftright;
  626. case nodetype of
  627. addn :
  628. op:=OP_ADD;
  629. muln :
  630. op:=OP_MUL;
  631. subn :
  632. op:=OP_SUB;
  633. slashn :
  634. op:=OP_DIV;
  635. else
  636. internalerror(200312231);
  637. end;
  638. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  639. { we can use only right as left operand if the operation is commutative }
  640. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  641. begin
  642. location.register:=right.location.register;
  643. { force floating point reg. location to be written to memory,
  644. we don't force it to mm register because writing to memory
  645. allows probably shorter code because there is no direct fpu->mm register
  646. copy instruction
  647. }
  648. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  649. location_force_mem(current_asmdata.CurrAsmList,left.location);
  650. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  651. end
  652. else
  653. begin
  654. if (nf_swapped in flags) then
  655. swapleftright;
  656. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  657. location.register:=left.location.register;
  658. { force floating point reg. location to be written to memory,
  659. we don't force it to mm register because writing to memory
  660. allows probably shorter code because there is no direct fpu->mm register
  661. copy instruction
  662. }
  663. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  664. location_force_mem(current_asmdata.CurrAsmList,right.location);
  665. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  666. end;
  667. end;
  668. procedure tx86addnode.second_cmpfloatsse;
  669. var
  670. op : tasmop;
  671. begin
  672. if is_single(left.resultdef) then
  673. op:=A_COMISS
  674. else if is_double(left.resultdef) then
  675. op:=A_COMISD
  676. else
  677. internalerror(200402222);
  678. pass_left_right;
  679. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  680. { we can use only right as left operand if the operation is commutative }
  681. if (right.location.loc=LOC_MMREGISTER) then
  682. begin
  683. { force floating point reg. location to be written to memory,
  684. we don't force it to mm register because writing to memory
  685. allows probably shorter code because there is no direct fpu->mm register
  686. copy instruction
  687. }
  688. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  689. location_force_mem(current_asmdata.CurrAsmList,left.location);
  690. case left.location.loc of
  691. LOC_REFERENCE,LOC_CREFERENCE:
  692. begin
  693. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  694. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  695. end;
  696. LOC_MMREGISTER,LOC_CMMREGISTER:
  697. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  698. else
  699. internalerror(200402221);
  700. end;
  701. if nf_swapped in flags then
  702. exclude(flags,nf_swapped)
  703. else
  704. include(flags,nf_swapped)
  705. end
  706. else
  707. begin
  708. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  709. { force floating point reg. location to be written to memory,
  710. we don't force it to mm register because writing to memory
  711. allows probably shorter code because there is no direct fpu->mm register
  712. copy instruction
  713. }
  714. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  715. location_force_mem(current_asmdata.CurrAsmList,right.location);
  716. case right.location.loc of
  717. LOC_REFERENCE,LOC_CREFERENCE:
  718. begin
  719. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  720. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  721. end;
  722. LOC_MMREGISTER,LOC_CMMREGISTER:
  723. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  724. else
  725. internalerror(200402223);
  726. end;
  727. end;
  728. location.resflags:=getresflags(true);
  729. end;
  730. procedure tx86addnode.second_opvector;
  731. var
  732. op : topcg;
  733. begin
  734. pass_left_right;
  735. if (nf_swapped in flags) then
  736. swapleftright;
  737. case nodetype of
  738. addn :
  739. op:=OP_ADD;
  740. muln :
  741. op:=OP_MUL;
  742. subn :
  743. op:=OP_SUB;
  744. slashn :
  745. op:=OP_DIV;
  746. else
  747. internalerror(200610071);
  748. end;
  749. if fits_in_mm_register(left.resultdef) then
  750. begin
  751. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  752. { we can use only right as left operand if the operation is commutative }
  753. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  754. begin
  755. location.register:=right.location.register;
  756. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  757. end
  758. else
  759. begin
  760. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  761. location.register:=left.location.register;
  762. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  763. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  764. end;
  765. end
  766. else
  767. begin
  768. { not yet supported }
  769. internalerror(200610072);
  770. end
  771. end;
  772. procedure tx86addnode.second_addfloat;
  773. var
  774. op : TAsmOp;
  775. begin
  776. if use_sse(resultdef) then
  777. begin
  778. second_addfloatsse;
  779. exit;
  780. end;
  781. pass_left_right;
  782. case nodetype of
  783. addn :
  784. op:=A_FADDP;
  785. muln :
  786. op:=A_FMULP;
  787. subn :
  788. op:=A_FSUBP;
  789. slashn :
  790. op:=A_FDIVP;
  791. else
  792. internalerror(2003042214);
  793. end;
  794. check_left_and_right_fpureg(true);
  795. { if we swaped the tree nodes, then use the reverse operator }
  796. if nf_swapped in flags then
  797. begin
  798. if (nodetype=slashn) then
  799. op:=A_FDIVRP
  800. else if (nodetype=subn) then
  801. op:=A_FSUBRP;
  802. end;
  803. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  804. tcgx86(cg).dec_fpu_stack;
  805. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  806. location.register:=NR_ST;
  807. end;
  808. procedure tx86addnode.second_cmpfloat;
  809. var
  810. resflags : tresflags;
  811. begin
  812. if use_sse(left.resultdef) or use_sse(right.resultdef) then
  813. begin
  814. second_cmpfloatsse;
  815. exit;
  816. end;
  817. pass_left_right;
  818. check_left_and_right_fpureg(true);
  819. {$ifndef x86_64}
  820. if current_settings.cputype<cpu_Pentium2 then
  821. begin
  822. emit_none(A_FCOMPP,S_NO);
  823. tcgx86(cg).dec_fpu_stack;
  824. tcgx86(cg).dec_fpu_stack;
  825. { load fpu flags }
  826. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  827. emit_reg(A_FNSTSW,S_NO,NR_AX);
  828. emit_none(A_SAHF,S_NO);
  829. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  830. if nf_swapped in flags then
  831. begin
  832. case nodetype of
  833. equaln : resflags:=F_E;
  834. unequaln : resflags:=F_NE;
  835. ltn : resflags:=F_A;
  836. lten : resflags:=F_AE;
  837. gtn : resflags:=F_B;
  838. gten : resflags:=F_BE;
  839. end;
  840. end
  841. else
  842. begin
  843. case nodetype of
  844. equaln : resflags:=F_E;
  845. unequaln : resflags:=F_NE;
  846. ltn : resflags:=F_B;
  847. lten : resflags:=F_BE;
  848. gtn : resflags:=F_A;
  849. gten : resflags:=F_AE;
  850. end;
  851. end;
  852. end
  853. else
  854. {$endif x86_64}
  855. begin
  856. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  857. { fcomip pops only one fpu register }
  858. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  859. tcgx86(cg).dec_fpu_stack;
  860. tcgx86(cg).dec_fpu_stack;
  861. { load fpu flags }
  862. if nf_swapped in flags then
  863. begin
  864. case nodetype of
  865. equaln : resflags:=F_E;
  866. unequaln : resflags:=F_NE;
  867. ltn : resflags:=F_A;
  868. lten : resflags:=F_AE;
  869. gtn : resflags:=F_B;
  870. gten : resflags:=F_BE;
  871. end;
  872. end
  873. else
  874. begin
  875. case nodetype of
  876. equaln : resflags:=F_E;
  877. unequaln : resflags:=F_NE;
  878. ltn : resflags:=F_B;
  879. lten : resflags:=F_BE;
  880. gtn : resflags:=F_A;
  881. gten : resflags:=F_AE;
  882. end;
  883. end;
  884. end;
  885. location_reset(location,LOC_FLAGS,OS_NO);
  886. location.resflags:=resflags;
  887. end;
  888. {*****************************************************************************
  889. Add64bit
  890. *****************************************************************************}
  891. procedure tx86addnode.second_add64bit;
  892. begin
  893. {$ifdef cpu64bitalu}
  894. second_addordinal;
  895. {$else cpu64bitalu}
  896. { must be implemented separate }
  897. internalerror(200402042);
  898. {$endif cpu64bitalu}
  899. end;
  900. procedure tx86addnode.second_cmp64bit;
  901. begin
  902. {$ifdef cpu64bitalu}
  903. second_cmpordinal;
  904. {$else cpu64bitalu}
  905. { must be implemented separate }
  906. internalerror(200402043);
  907. {$endif cpu64bitalu}
  908. end;
  909. {*****************************************************************************
  910. AddOrdinal
  911. *****************************************************************************}
  912. procedure tx86addnode.second_cmpordinal;
  913. var
  914. opsize : tcgsize;
  915. unsigned : boolean;
  916. begin
  917. unsigned:=not(is_signed(left.resultdef)) or
  918. not(is_signed(right.resultdef));
  919. opsize:=def_cgsize(left.resultdef);
  920. pass_left_right;
  921. left_must_be_reg(opsize,false);
  922. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  923. location_freetemp(current_asmdata.CurrAsmList,right.location);
  924. location_freetemp(current_asmdata.CurrAsmList,left.location);
  925. location_reset(location,LOC_FLAGS,OS_NO);
  926. location.resflags:=getresflags(unsigned);
  927. end;
  928. begin
  929. caddnode:=tx86addnode;
  930. end.