nx86add.pas 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. public
  35. procedure second_addfloat;override;
  36. // procedure second_addsmallset;override;
  37. procedure second_add64bit;override;
  38. procedure second_cmpfloat;override;
  39. procedure second_cmpsmallset;override;
  40. procedure second_cmp64bit;override;
  41. procedure second_cmpordinal;override;
  42. {$ifdef SUPPORT_MMX}
  43. procedure second_opmmx;override;
  44. {$endif SUPPORT_MMX}
  45. procedure second_opvector;override;
  46. end;
  47. implementation
  48. uses
  49. globtype,globals,
  50. verbose,cutils,
  51. cpuinfo,
  52. aasmbase,aasmtai,aasmdata,aasmcpu,
  53. symconst,symdef,
  54. cgobj,cgx86,cga,cgutils,
  55. paramgr,tgobj,ncgutil,
  56. ncon,nset,
  57. defutil;
  58. {*****************************************************************************
  59. Helpers
  60. *****************************************************************************}
  61. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  62. var
  63. power : longint;
  64. hl4 : tasmlabel;
  65. r : Tregister;
  66. begin
  67. { at this point, left.location.loc should be LOC_REGISTER }
  68. if right.location.loc=LOC_REGISTER then
  69. begin
  70. { right.location is a LOC_REGISTER }
  71. { when swapped another result register }
  72. if (nodetype=subn) and (nf_swapped in flags) then
  73. begin
  74. if extra_not then
  75. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  76. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  77. { newly swapped also set swapped flag }
  78. location_swap(left.location,right.location);
  79. toggleflag(nf_swapped);
  80. end
  81. else
  82. begin
  83. if extra_not then
  84. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  85. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  86. location_swap(left.location,right.location);
  87. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  88. end;
  89. end
  90. else
  91. begin
  92. { right.location is not a LOC_REGISTER }
  93. if (nodetype=subn) and (nf_swapped in flags) then
  94. begin
  95. if extra_not then
  96. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  97. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  98. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  99. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  100. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  101. end
  102. else
  103. begin
  104. { Optimizations when right.location is a constant value }
  105. if (op=A_CMP) and
  106. (nodetype in [equaln,unequaln]) and
  107. (right.location.loc=LOC_CONSTANT) and
  108. (right.location.value=0) then
  109. begin
  110. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  111. end
  112. else
  113. if (op=A_ADD) and
  114. (right.location.loc=LOC_CONSTANT) and
  115. (right.location.value=1) and
  116. not(cs_check_overflow in current_settings.localswitches) then
  117. begin
  118. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  119. end
  120. else
  121. if (op=A_SUB) and
  122. (right.location.loc=LOC_CONSTANT) and
  123. (right.location.value=1) and
  124. not(cs_check_overflow in current_settings.localswitches) then
  125. begin
  126. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  127. end
  128. else
  129. if (op=A_IMUL) and
  130. (right.location.loc=LOC_CONSTANT) and
  131. (ispowerof2(int64(right.location.value),power)) and
  132. not(cs_check_overflow in current_settings.localswitches) then
  133. begin
  134. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  135. end
  136. else
  137. begin
  138. if extra_not then
  139. begin
  140. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  141. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  142. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  143. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  144. end
  145. else
  146. begin
  147. emit_op_right_left(op,opsize);
  148. end;
  149. end;
  150. end;
  151. end;
  152. { only in case of overflow operations }
  153. { produce overflow code }
  154. { we must put it here directly, because sign of operation }
  155. { is in unsigned VAR!! }
  156. if mboverflow then
  157. begin
  158. if cs_check_overflow in current_settings.localswitches then
  159. begin
  160. current_asmdata.getjumplabel(hl4);
  161. if unsigned then
  162. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  163. else
  164. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  165. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  166. cg.a_label(current_asmdata.CurrAsmList,hl4);
  167. end;
  168. end;
  169. end;
  170. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  171. begin
  172. { left location is not a register? }
  173. if (left.location.loc<>LOC_REGISTER) then
  174. begin
  175. { if right is register then we can swap the locations }
  176. if (not noswap) and
  177. (right.location.loc=LOC_REGISTER) then
  178. begin
  179. location_swap(left.location,right.location);
  180. toggleflag(nf_swapped);
  181. end
  182. else
  183. begin
  184. { maybe we can reuse a constant register when the
  185. operation is a comparison that doesn't change the
  186. value of the register }
  187. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  188. end;
  189. end;
  190. if (right.location.loc<>LOC_CONSTANT) and
  191. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  192. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  193. if (left.location.loc<>LOC_CONSTANT) and
  194. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  195. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  196. end;
  197. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  198. begin
  199. if (right.location.loc<>LOC_FPUREGISTER) then
  200. begin
  201. if (force_fpureg) then
  202. begin
  203. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  204. if (left.location.loc<>LOC_FPUREGISTER) then
  205. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  206. else
  207. { left was on the stack => swap }
  208. toggleflag(nf_swapped);
  209. end
  210. end
  211. { the nominator in st0 }
  212. else if (left.location.loc<>LOC_FPUREGISTER) then
  213. begin
  214. if (force_fpureg) then
  215. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  216. end
  217. else
  218. begin
  219. { fpu operands are always in the wrong order on the stack }
  220. toggleflag(nf_swapped);
  221. end;
  222. end;
  223. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  224. {$ifdef x86_64}
  225. var
  226. tmpreg : tregister;
  227. {$endif x86_64}
  228. begin
  229. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  230. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
  231. { left must be a register }
  232. case right.location.loc of
  233. LOC_REGISTER,
  234. LOC_CREGISTER :
  235. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  236. LOC_REFERENCE,
  237. LOC_CREFERENCE :
  238. begin
  239. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  240. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  241. end;
  242. LOC_CONSTANT :
  243. begin
  244. {$ifdef x86_64}
  245. { x86_64 only supports signed 32 bits constants directly }
  246. if (opsize in [OS_S64,OS_64]) and
  247. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  248. begin
  249. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  250. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  251. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  252. end
  253. else
  254. {$endif x86_64}
  255. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  256. end;
  257. else
  258. internalerror(200203232);
  259. end;
  260. end;
  261. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  262. begin
  263. case nodetype of
  264. equaln : getresflags:=F_E;
  265. unequaln : getresflags:=F_NE;
  266. else
  267. if not(unsigned) then
  268. begin
  269. if nf_swapped in flags then
  270. case nodetype of
  271. ltn : getresflags:=F_G;
  272. lten : getresflags:=F_GE;
  273. gtn : getresflags:=F_L;
  274. gten : getresflags:=F_LE;
  275. end
  276. else
  277. case nodetype of
  278. ltn : getresflags:=F_L;
  279. lten : getresflags:=F_LE;
  280. gtn : getresflags:=F_G;
  281. gten : getresflags:=F_GE;
  282. end;
  283. end
  284. else
  285. begin
  286. if nf_swapped in flags then
  287. case nodetype of
  288. ltn : getresflags:=F_A;
  289. lten : getresflags:=F_AE;
  290. gtn : getresflags:=F_B;
  291. gten : getresflags:=F_BE;
  292. end
  293. else
  294. case nodetype of
  295. ltn : getresflags:=F_B;
  296. lten : getresflags:=F_BE;
  297. gtn : getresflags:=F_A;
  298. gten : getresflags:=F_AE;
  299. end;
  300. end;
  301. end;
  302. end;
  303. {*****************************************************************************
  304. AddSmallSet
  305. *****************************************************************************}
  306. (*
  307. procedure tx86addnode.second_addsmallset;
  308. var
  309. setbase : aint;
  310. opsize : TCGSize;
  311. op : TAsmOp;
  312. extra_not,
  313. noswap : boolean;
  314. all_member_optimization:boolean;
  315. begin
  316. pass_left_right;
  317. noswap:=false;
  318. extra_not:=false;
  319. all_member_optimization:=false;
  320. opsize:=int_cgsize(resultdef.size);
  321. if (left.resultdef.typ=setdef) then
  322. setbase:=tsetdef(left.resultdef).setbase
  323. else
  324. setbase:=tsetdef(right.resultdef).setbase;
  325. case nodetype of
  326. addn :
  327. begin
  328. { adding elements is not commutative }
  329. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  330. swapleftright;
  331. { are we adding set elements ? }
  332. if right.nodetype=setelementn then
  333. begin
  334. { no range support for smallsets! }
  335. if assigned(tsetelementnode(right).right) then
  336. internalerror(43244);
  337. { btsb isn't supported }
  338. if opsize=OS_8 then
  339. opsize:=OS_32;
  340. { bts requires both elements to be registers }
  341. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  342. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  343. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  344. op:=A_BTS;
  345. noswap:=true;
  346. end
  347. else
  348. op:=A_OR;
  349. end;
  350. symdifn :
  351. op:=A_XOR;
  352. muln :
  353. op:=A_AND;
  354. subn :
  355. begin
  356. op:=A_AND;
  357. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  358. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  359. all_member_optimization:=true;
  360. if (not(nf_swapped in flags)) and
  361. (right.location.loc=LOC_CONSTANT) then
  362. right.location.value := not(right.location.value)
  363. else if (nf_swapped in flags) and
  364. (left.location.loc=LOC_CONSTANT) then
  365. left.location.value := not(left.location.value)
  366. else
  367. extra_not:=true;
  368. end;
  369. xorn :
  370. op:=A_XOR;
  371. orn :
  372. op:=A_OR;
  373. andn :
  374. op:=A_AND;
  375. else
  376. internalerror(2003042215);
  377. end;
  378. if all_member_optimization then
  379. begin
  380. {A set expression [0..31]-x can be implemented with a simple NOT.}
  381. if nf_swapped in flags then
  382. begin
  383. { newly swapped also set swapped flag }
  384. location_swap(left.location,right.location);
  385. toggleflag(nf_swapped);
  386. end;
  387. location_force_reg(current_asmdata.currAsmList,right.location,opsize,false);
  388. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  389. location:=right.location;
  390. end
  391. else
  392. begin
  393. { left must be a register }
  394. left_must_be_reg(opsize,noswap);
  395. emit_generic_code(op,opsize,true,extra_not,false);
  396. location_freetemp(current_asmdata.CurrAsmList,right.location);
  397. { left is always a register and contains the result }
  398. location:=left.location;
  399. end;
  400. { fix the changed opsize we did above because of the missing btsb }
  401. if opsize<>int_cgsize(resultdef.size) then
  402. location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false);
  403. end;
  404. *)
  405. procedure tx86addnode.second_cmpsmallset;
  406. var
  407. opsize : TCGSize;
  408. op : TAsmOp;
  409. begin
  410. pass_left_right;
  411. opsize:=int_cgsize(left.resultdef.size);
  412. case nodetype of
  413. equaln,
  414. unequaln :
  415. op:=A_CMP;
  416. lten,gten:
  417. begin
  418. if (not(nf_swapped in flags) and (nodetype = lten)) or
  419. ((nf_swapped in flags) and (nodetype = gten)) then
  420. swapleftright;
  421. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  422. emit_op_right_left(A_AND,opsize);
  423. op:=A_CMP;
  424. { warning: ugly hack, we need a JE so change the node to equaln }
  425. nodetype:=equaln;
  426. end;
  427. else
  428. internalerror(2003042215);
  429. end;
  430. { left must be a register }
  431. left_must_be_reg(opsize,false);
  432. emit_generic_code(op,opsize,true,false,false);
  433. location_freetemp(current_asmdata.CurrAsmList,right.location);
  434. location_freetemp(current_asmdata.CurrAsmList,left.location);
  435. location_reset(location,LOC_FLAGS,OS_NO);
  436. location.resflags:=getresflags(true);
  437. end;
  438. {*****************************************************************************
  439. AddMMX
  440. *****************************************************************************}
  441. {$ifdef SUPPORT_MMX}
  442. procedure tx86addnode.second_opmmx;
  443. var
  444. op : TAsmOp;
  445. cmpop : boolean;
  446. mmxbase : tmmxtype;
  447. hreg,
  448. hregister : tregister;
  449. begin
  450. pass_left_right;
  451. cmpop:=false;
  452. mmxbase:=mmx_type(left.resultdef);
  453. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  454. case nodetype of
  455. addn :
  456. begin
  457. if (cs_mmx_saturation in current_settings.localswitches) then
  458. begin
  459. case mmxbase of
  460. mmxs8bit:
  461. op:=A_PADDSB;
  462. mmxu8bit:
  463. op:=A_PADDUSB;
  464. mmxs16bit,mmxfixed16:
  465. op:=A_PADDSW;
  466. mmxu16bit:
  467. op:=A_PADDUSW;
  468. end;
  469. end
  470. else
  471. begin
  472. case mmxbase of
  473. mmxs8bit,mmxu8bit:
  474. op:=A_PADDB;
  475. mmxs16bit,mmxu16bit,mmxfixed16:
  476. op:=A_PADDW;
  477. mmxs32bit,mmxu32bit:
  478. op:=A_PADDD;
  479. end;
  480. end;
  481. end;
  482. muln :
  483. begin
  484. case mmxbase of
  485. mmxs16bit,mmxu16bit:
  486. op:=A_PMULLW;
  487. mmxfixed16:
  488. op:=A_PMULHW;
  489. end;
  490. end;
  491. subn :
  492. begin
  493. if (cs_mmx_saturation in current_settings.localswitches) then
  494. begin
  495. case mmxbase of
  496. mmxs8bit:
  497. op:=A_PSUBSB;
  498. mmxu8bit:
  499. op:=A_PSUBUSB;
  500. mmxs16bit,mmxfixed16:
  501. op:=A_PSUBSB;
  502. mmxu16bit:
  503. op:=A_PSUBUSW;
  504. end;
  505. end
  506. else
  507. begin
  508. case mmxbase of
  509. mmxs8bit,mmxu8bit:
  510. op:=A_PSUBB;
  511. mmxs16bit,mmxu16bit,mmxfixed16:
  512. op:=A_PSUBW;
  513. mmxs32bit,mmxu32bit:
  514. op:=A_PSUBD;
  515. end;
  516. end;
  517. end;
  518. xorn:
  519. op:=A_PXOR;
  520. orn:
  521. op:=A_POR;
  522. andn:
  523. op:=A_PAND;
  524. else
  525. internalerror(2003042214);
  526. end;
  527. { left and right no register? }
  528. { then one must be demanded }
  529. if (left.location.loc<>LOC_MMXREGISTER) then
  530. begin
  531. if (right.location.loc=LOC_MMXREGISTER) then
  532. begin
  533. location_swap(left.location,right.location);
  534. toggleflag(nf_swapped);
  535. end
  536. else
  537. begin
  538. { register variable ? }
  539. if (left.location.loc=LOC_CMMXREGISTER) then
  540. begin
  541. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  542. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  543. end
  544. else
  545. begin
  546. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  547. internalerror(200203245);
  548. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  549. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  550. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  551. end;
  552. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  553. left.location.register:=hregister;
  554. end;
  555. end;
  556. { at this point, left.location.loc should be LOC_MMXREGISTER }
  557. if right.location.loc<>LOC_MMXREGISTER then
  558. begin
  559. if (nodetype=subn) and (nf_swapped in flags) then
  560. begin
  561. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  562. if right.location.loc=LOC_CMMXREGISTER then
  563. begin
  564. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  565. emit_reg_reg(op,S_NO,left.location.register,hreg);
  566. end
  567. else
  568. begin
  569. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  570. internalerror(200203247);
  571. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  572. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  573. emit_reg_reg(op,S_NO,left.location.register,hreg);
  574. end;
  575. location.register:=hreg;
  576. end
  577. else
  578. begin
  579. if (right.location.loc=LOC_CMMXREGISTER) then
  580. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  581. else
  582. begin
  583. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  584. internalerror(200203246);
  585. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  586. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  587. end;
  588. location.register:=left.location.register;
  589. end;
  590. end
  591. else
  592. begin
  593. { right.location=LOC_MMXREGISTER }
  594. if (nodetype=subn) and (nf_swapped in flags) then
  595. begin
  596. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  597. location_swap(left.location,right.location);
  598. toggleflag(nf_swapped);
  599. end
  600. else
  601. begin
  602. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  603. end;
  604. location.register:=left.location.register;
  605. end;
  606. location_freetemp(current_asmdata.CurrAsmList,right.location);
  607. if cmpop then
  608. location_freetemp(current_asmdata.CurrAsmList,left.location);
  609. end;
  610. {$endif SUPPORT_MMX}
  611. {*****************************************************************************
  612. AddFloat
  613. *****************************************************************************}
  614. procedure tx86addnode.second_addfloatsse;
  615. var
  616. op : topcg;
  617. begin
  618. pass_left_right;
  619. check_left_and_right_fpureg(false);
  620. if (nf_swapped in flags) then
  621. { can't use swapleftright if both are on the fpu stack, since then }
  622. { both are "R_ST" -> nothing would change -> manually switch }
  623. if (left.location.loc = LOC_FPUREGISTER) and
  624. (right.location.loc = LOC_FPUREGISTER) then
  625. emit_none(A_FXCH,S_NO)
  626. else
  627. swapleftright;
  628. case nodetype of
  629. addn :
  630. op:=OP_ADD;
  631. muln :
  632. op:=OP_MUL;
  633. subn :
  634. op:=OP_SUB;
  635. slashn :
  636. op:=OP_DIV;
  637. else
  638. internalerror(200312231);
  639. end;
  640. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  641. { we can use only right as left operand if the operation is commutative }
  642. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  643. begin
  644. location.register:=right.location.register;
  645. { force floating point reg. location to be written to memory,
  646. we don't force it to mm register because writing to memory
  647. allows probably shorter code because there is no direct fpu->mm register
  648. copy instruction
  649. }
  650. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  651. location_force_mem(current_asmdata.CurrAsmList,left.location);
  652. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  653. end
  654. else
  655. begin
  656. if (nf_swapped in flags) then
  657. swapleftright;
  658. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  659. location.register:=left.location.register;
  660. { force floating point reg. location to be written to memory,
  661. we don't force it to mm register because writing to memory
  662. allows probably shorter code because there is no direct fpu->mm register
  663. copy instruction
  664. }
  665. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  666. location_force_mem(current_asmdata.CurrAsmList,right.location);
  667. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  668. end;
  669. end;
  670. procedure tx86addnode.second_cmpfloatsse;
  671. var
  672. op : tasmop;
  673. begin
  674. if is_single(left.resultdef) then
  675. op:=A_COMISS
  676. else if is_double(left.resultdef) then
  677. op:=A_COMISD
  678. else
  679. internalerror(200402222);
  680. pass_left_right;
  681. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  682. { we can use only right as left operand if the operation is commutative }
  683. if (right.location.loc=LOC_MMREGISTER) then
  684. begin
  685. { force floating point reg. location to be written to memory,
  686. we don't force it to mm register because writing to memory
  687. allows probably shorter code because there is no direct fpu->mm register
  688. copy instruction
  689. }
  690. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  691. location_force_mem(current_asmdata.CurrAsmList,left.location);
  692. case left.location.loc of
  693. LOC_REFERENCE,LOC_CREFERENCE:
  694. begin
  695. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  696. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  697. end;
  698. LOC_MMREGISTER,LOC_CMMREGISTER:
  699. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  700. else
  701. internalerror(200402221);
  702. end;
  703. if nf_swapped in flags then
  704. exclude(flags,nf_swapped)
  705. else
  706. include(flags,nf_swapped)
  707. end
  708. else
  709. begin
  710. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  711. { force floating point reg. location to be written to memory,
  712. we don't force it to mm register because writing to memory
  713. allows probably shorter code because there is no direct fpu->mm register
  714. copy instruction
  715. }
  716. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  717. location_force_mem(current_asmdata.CurrAsmList,right.location);
  718. case right.location.loc of
  719. LOC_REFERENCE,LOC_CREFERENCE:
  720. begin
  721. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  722. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  723. end;
  724. LOC_MMREGISTER,LOC_CMMREGISTER:
  725. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  726. else
  727. internalerror(200402223);
  728. end;
  729. end;
  730. location.resflags:=getresflags(true);
  731. end;
  732. procedure tx86addnode.second_opvector;
  733. var
  734. op : topcg;
  735. begin
  736. pass_left_right;
  737. if (nf_swapped in flags) then
  738. swapleftright;
  739. case nodetype of
  740. addn :
  741. op:=OP_ADD;
  742. muln :
  743. op:=OP_MUL;
  744. subn :
  745. op:=OP_SUB;
  746. slashn :
  747. op:=OP_DIV;
  748. else
  749. internalerror(200610071);
  750. end;
  751. if fits_in_mm_register(left.resultdef) then
  752. begin
  753. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  754. { we can use only right as left operand if the operation is commutative }
  755. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  756. begin
  757. location.register:=right.location.register;
  758. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  759. end
  760. else
  761. begin
  762. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  763. location.register:=left.location.register;
  764. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  765. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  766. end;
  767. end
  768. else
  769. begin
  770. { not yet supported }
  771. internalerror(200610072);
  772. end
  773. end;
  774. procedure tx86addnode.second_addfloat;
  775. var
  776. op : TAsmOp;
  777. begin
  778. if use_vectorfpu(resultdef) then
  779. begin
  780. second_addfloatsse;
  781. exit;
  782. end;
  783. pass_left_right;
  784. case nodetype of
  785. addn :
  786. op:=A_FADDP;
  787. muln :
  788. op:=A_FMULP;
  789. subn :
  790. op:=A_FSUBP;
  791. slashn :
  792. op:=A_FDIVP;
  793. else
  794. internalerror(2003042214);
  795. end;
  796. check_left_and_right_fpureg(true);
  797. { if we swaped the tree nodes, then use the reverse operator }
  798. if nf_swapped in flags then
  799. begin
  800. if (nodetype=slashn) then
  801. op:=A_FDIVRP
  802. else if (nodetype=subn) then
  803. op:=A_FSUBRP;
  804. end;
  805. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  806. tcgx86(cg).dec_fpu_stack;
  807. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  808. location.register:=NR_ST;
  809. end;
  810. procedure tx86addnode.second_cmpfloat;
  811. var
  812. resflags : tresflags;
  813. begin
  814. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  815. begin
  816. second_cmpfloatsse;
  817. exit;
  818. end;
  819. pass_left_right;
  820. check_left_and_right_fpureg(true);
  821. {$ifndef x86_64}
  822. if current_settings.cputype<cpu_Pentium2 then
  823. begin
  824. emit_none(A_FCOMPP,S_NO);
  825. tcgx86(cg).dec_fpu_stack;
  826. tcgx86(cg).dec_fpu_stack;
  827. { load fpu flags }
  828. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  829. emit_reg(A_FNSTSW,S_NO,NR_AX);
  830. emit_none(A_SAHF,S_NO);
  831. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  832. if nf_swapped in flags then
  833. begin
  834. case nodetype of
  835. equaln : resflags:=F_E;
  836. unequaln : resflags:=F_NE;
  837. ltn : resflags:=F_A;
  838. lten : resflags:=F_AE;
  839. gtn : resflags:=F_B;
  840. gten : resflags:=F_BE;
  841. end;
  842. end
  843. else
  844. begin
  845. case nodetype of
  846. equaln : resflags:=F_E;
  847. unequaln : resflags:=F_NE;
  848. ltn : resflags:=F_B;
  849. lten : resflags:=F_BE;
  850. gtn : resflags:=F_A;
  851. gten : resflags:=F_AE;
  852. end;
  853. end;
  854. end
  855. else
  856. {$endif x86_64}
  857. begin
  858. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  859. { fcomip pops only one fpu register }
  860. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  861. tcgx86(cg).dec_fpu_stack;
  862. tcgx86(cg).dec_fpu_stack;
  863. { load fpu flags }
  864. if nf_swapped in flags then
  865. begin
  866. case nodetype of
  867. equaln : resflags:=F_E;
  868. unequaln : resflags:=F_NE;
  869. ltn : resflags:=F_A;
  870. lten : resflags:=F_AE;
  871. gtn : resflags:=F_B;
  872. gten : resflags:=F_BE;
  873. end;
  874. end
  875. else
  876. begin
  877. case nodetype of
  878. equaln : resflags:=F_E;
  879. unequaln : resflags:=F_NE;
  880. ltn : resflags:=F_B;
  881. lten : resflags:=F_BE;
  882. gtn : resflags:=F_A;
  883. gten : resflags:=F_AE;
  884. end;
  885. end;
  886. end;
  887. location_reset(location,LOC_FLAGS,OS_NO);
  888. location.resflags:=resflags;
  889. end;
  890. {*****************************************************************************
  891. Add64bit
  892. *****************************************************************************}
  893. procedure tx86addnode.second_add64bit;
  894. begin
  895. {$ifdef cpu64bitalu}
  896. second_addordinal;
  897. {$else cpu64bitalu}
  898. { must be implemented separate }
  899. internalerror(200402042);
  900. {$endif cpu64bitalu}
  901. end;
  902. procedure tx86addnode.second_cmp64bit;
  903. begin
  904. {$ifdef cpu64bitalu}
  905. second_cmpordinal;
  906. {$else cpu64bitalu}
  907. { must be implemented separate }
  908. internalerror(200402043);
  909. {$endif cpu64bitalu}
  910. end;
  911. {*****************************************************************************
  912. AddOrdinal
  913. *****************************************************************************}
  914. procedure tx86addnode.second_cmpordinal;
  915. var
  916. opsize : tcgsize;
  917. unsigned : boolean;
  918. begin
  919. unsigned:=not(is_signed(left.resultdef)) or
  920. not(is_signed(right.resultdef));
  921. opsize:=def_cgsize(left.resultdef);
  922. pass_left_right;
  923. left_must_be_reg(opsize,false);
  924. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  925. location_freetemp(current_asmdata.CurrAsmList,right.location);
  926. location_freetemp(current_asmdata.CurrAsmList,left.location);
  927. location_reset(location,LOC_FLAGS,OS_NO);
  928. location.resflags:=getresflags(unsigned);
  929. end;
  930. begin
  931. caddnode:=tx86addnode;
  932. end.