nx86add.pas 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure left_and_right_must_be_fpureg;
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. procedure second_mul;virtual;abstract;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_addordinal;override;
  40. procedure second_cmpfloat;override;
  41. procedure second_cmpsmallset;override;
  42. procedure second_cmp64bit;override;
  43. procedure second_cmpordinal;override;
  44. {$ifdef SUPPORT_MMX}
  45. procedure second_opmmxset;override;
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. procedure second_opvector;override;
  49. end;
  50. implementation
  51. uses
  52. globtype,globals,
  53. verbose,cutils,
  54. cpuinfo,
  55. aasmbase,aasmtai,aasmdata,aasmcpu,
  56. symconst,symdef,
  57. cgobj,cgx86,cga,cgutils,
  58. paramgr,tgobj,ncgutil,
  59. ncon,nset,
  60. defutil;
  61. {*****************************************************************************
  62. Helpers
  63. *****************************************************************************}
  64. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  65. var
  66. power : longint;
  67. hl4 : tasmlabel;
  68. r : Tregister;
  69. begin
  70. { at this point, left.location.loc should be LOC_REGISTER }
  71. if right.location.loc=LOC_REGISTER then
  72. begin
  73. { right.location is a LOC_REGISTER }
  74. { when swapped another result register }
  75. if (nodetype=subn) and (nf_swapped in flags) then
  76. begin
  77. if extra_not then
  78. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  79. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  80. { newly swapped also set swapped flag }
  81. location_swap(left.location,right.location);
  82. toggleflag(nf_swapped);
  83. end
  84. else
  85. begin
  86. if extra_not then
  87. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  88. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  89. location_swap(left.location,right.location);
  90. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  91. end;
  92. end
  93. else
  94. begin
  95. { right.location is not a LOC_REGISTER }
  96. if (nodetype=subn) and (nf_swapped in flags) then
  97. begin
  98. if extra_not then
  99. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  100. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  101. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  102. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  103. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  104. end
  105. else
  106. begin
  107. { Optimizations when right.location is a constant value }
  108. if (op=A_CMP) and
  109. (nodetype in [equaln,unequaln]) and
  110. (right.location.loc=LOC_CONSTANT) and
  111. (right.location.value=0) then
  112. begin
  113. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  114. end
  115. else
  116. if (op=A_ADD) and
  117. (right.location.loc=LOC_CONSTANT) and
  118. (right.location.value=1) and
  119. not(cs_check_overflow in current_settings.localswitches) then
  120. begin
  121. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  122. end
  123. else
  124. if (op=A_SUB) and
  125. (right.location.loc=LOC_CONSTANT) and
  126. (right.location.value=1) and
  127. not(cs_check_overflow in current_settings.localswitches) then
  128. begin
  129. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  130. end
  131. else
  132. if (op=A_IMUL) and
  133. (right.location.loc=LOC_CONSTANT) and
  134. (ispowerof2(int64(right.location.value),power)) and
  135. not(cs_check_overflow in current_settings.localswitches) then
  136. begin
  137. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  138. end
  139. else
  140. begin
  141. if extra_not then
  142. begin
  143. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  144. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  145. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  146. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  147. end
  148. else
  149. begin
  150. emit_op_right_left(op,opsize);
  151. end;
  152. end;
  153. end;
  154. end;
  155. { only in case of overflow operations }
  156. { produce overflow code }
  157. { we must put it here directly, because sign of operation }
  158. { is in unsigned VAR!! }
  159. if mboverflow then
  160. begin
  161. if cs_check_overflow in current_settings.localswitches then
  162. begin
  163. current_asmdata.getjumplabel(hl4);
  164. if unsigned then
  165. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  166. else
  167. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  168. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW');
  169. cg.a_label(current_asmdata.CurrAsmList,hl4);
  170. end;
  171. end;
  172. end;
  173. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  174. begin
  175. { left location is not a register? }
  176. if (left.location.loc<>LOC_REGISTER) then
  177. begin
  178. { if right is register then we can swap the locations }
  179. if (not noswap) and
  180. (right.location.loc=LOC_REGISTER) then
  181. begin
  182. location_swap(left.location,right.location);
  183. toggleflag(nf_swapped);
  184. end
  185. else
  186. begin
  187. { maybe we can reuse a constant register when the
  188. operation is a comparison that doesn't change the
  189. value of the register }
  190. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  191. end;
  192. end;
  193. end;
  194. procedure tx86addnode.left_and_right_must_be_fpureg;
  195. begin
  196. if (right.location.loc<>LOC_FPUREGISTER) then
  197. begin
  198. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  199. if (left.location.loc<>LOC_FPUREGISTER) then
  200. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  201. else
  202. { left was on the stack => swap }
  203. toggleflag(nf_swapped);
  204. end
  205. { the nominator in st0 }
  206. else if (left.location.loc<>LOC_FPUREGISTER) then
  207. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  208. else
  209. begin
  210. { fpu operands are always in the wrong order on the stack }
  211. toggleflag(nf_swapped);
  212. end;
  213. end;
  214. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  215. {$ifdef x86_64}
  216. var
  217. tmpreg : tregister;
  218. {$endif x86_64}
  219. begin
  220. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  221. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
  222. { left must be a register }
  223. case right.location.loc of
  224. LOC_REGISTER,
  225. LOC_CREGISTER :
  226. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  227. LOC_REFERENCE,
  228. LOC_CREFERENCE :
  229. begin
  230. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  231. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  232. end;
  233. LOC_CONSTANT :
  234. begin
  235. {$ifdef x86_64}
  236. { x86_64 only supports signed 32 bits constants directly }
  237. if (opsize in [OS_S64,OS_64]) and
  238. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  239. begin
  240. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  241. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  242. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  243. end
  244. else
  245. {$endif x86_64}
  246. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  247. end;
  248. else
  249. internalerror(200203232);
  250. end;
  251. end;
  252. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  253. begin
  254. case nodetype of
  255. equaln : getresflags:=F_E;
  256. unequaln : getresflags:=F_NE;
  257. else
  258. if not(unsigned) then
  259. begin
  260. if nf_swapped in flags then
  261. case nodetype of
  262. ltn : getresflags:=F_G;
  263. lten : getresflags:=F_GE;
  264. gtn : getresflags:=F_L;
  265. gten : getresflags:=F_LE;
  266. end
  267. else
  268. case nodetype of
  269. ltn : getresflags:=F_L;
  270. lten : getresflags:=F_LE;
  271. gtn : getresflags:=F_G;
  272. gten : getresflags:=F_GE;
  273. end;
  274. end
  275. else
  276. begin
  277. if nf_swapped in flags then
  278. case nodetype of
  279. ltn : getresflags:=F_A;
  280. lten : getresflags:=F_AE;
  281. gtn : getresflags:=F_B;
  282. gten : getresflags:=F_BE;
  283. end
  284. else
  285. case nodetype of
  286. ltn : getresflags:=F_B;
  287. lten : getresflags:=F_BE;
  288. gtn : getresflags:=F_A;
  289. gten : getresflags:=F_AE;
  290. end;
  291. end;
  292. end;
  293. end;
  294. {*****************************************************************************
  295. AddSmallSet
  296. *****************************************************************************}
  297. procedure tx86addnode.second_addsmallset;
  298. var
  299. opsize : TCGSize;
  300. op : TAsmOp;
  301. extra_not,
  302. noswap : boolean;
  303. begin
  304. pass_left_right;
  305. noswap:=false;
  306. extra_not:=false;
  307. opsize:=int_cgsize(resultdef.size);
  308. case nodetype of
  309. addn :
  310. begin
  311. { adding elements is not commutative }
  312. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  313. swapleftright;
  314. { are we adding set elements ? }
  315. if right.nodetype=setelementn then
  316. begin
  317. { no range support for smallsets! }
  318. if assigned(tsetelementnode(right).right) then
  319. internalerror(43244);
  320. { btsb isn't supported }
  321. if opsize=OS_8 then
  322. opsize:=OS_32;
  323. { bts requires both elements to be registers }
  324. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  325. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  326. op:=A_BTS;
  327. noswap:=true;
  328. end
  329. else
  330. op:=A_OR;
  331. end;
  332. symdifn :
  333. op:=A_XOR;
  334. muln :
  335. op:=A_AND;
  336. subn :
  337. begin
  338. op:=A_AND;
  339. if (not(nf_swapped in flags)) and
  340. (right.location.loc=LOC_CONSTANT) then
  341. right.location.value := not(right.location.value)
  342. else if (nf_swapped in flags) and
  343. (left.location.loc=LOC_CONSTANT) then
  344. left.location.value := not(left.location.value)
  345. else
  346. extra_not:=true;
  347. end;
  348. xorn :
  349. op:=A_XOR;
  350. orn :
  351. op:=A_OR;
  352. andn :
  353. op:=A_AND;
  354. else
  355. internalerror(2003042215);
  356. end;
  357. { left must be a register }
  358. left_must_be_reg(opsize,noswap);
  359. emit_generic_code(op,opsize,true,extra_not,false);
  360. location_freetemp(current_asmdata.CurrAsmList,right.location);
  361. { left is always a register and contains the result }
  362. location:=left.location;
  363. { fix the changed opsize we did above because of the missing btsb }
  364. if opsize<>int_cgsize(resultdef.size) then
  365. location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false);
  366. end;
  367. procedure tx86addnode.second_cmpsmallset;
  368. var
  369. opsize : TCGSize;
  370. op : TAsmOp;
  371. begin
  372. pass_left_right;
  373. opsize:=int_cgsize(resultdef.size);
  374. case nodetype of
  375. equaln,
  376. unequaln :
  377. op:=A_CMP;
  378. lten,gten:
  379. begin
  380. if (not(nf_swapped in flags) and (nodetype = lten)) or
  381. ((nf_swapped in flags) and (nodetype = gten)) then
  382. swapleftright;
  383. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  384. emit_op_right_left(A_AND,opsize);
  385. op:=A_CMP;
  386. { warning: ugly hack, we need a JE so change the node to equaln }
  387. nodetype:=equaln;
  388. end;
  389. else
  390. internalerror(2003042215);
  391. end;
  392. { left must be a register }
  393. left_must_be_reg(opsize,false);
  394. emit_generic_code(op,opsize,true,false,false);
  395. location_freetemp(current_asmdata.CurrAsmList,right.location);
  396. location_freetemp(current_asmdata.CurrAsmList,left.location);
  397. location_reset(location,LOC_FLAGS,OS_NO);
  398. location.resflags:=getresflags(true);
  399. end;
  400. {*****************************************************************************
  401. AddMMX
  402. *****************************************************************************}
  403. {$ifdef SUPPORT_MMX}
  404. procedure tx86addnode.second_opmmx;
  405. var
  406. op : TAsmOp;
  407. cmpop : boolean;
  408. mmxbase : tmmxtype;
  409. hreg,
  410. hregister : tregister;
  411. begin
  412. pass_left_right;
  413. cmpop:=false;
  414. mmxbase:=mmx_type(left.resultdef);
  415. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  416. case nodetype of
  417. addn :
  418. begin
  419. if (cs_mmx_saturation in current_settings.localswitches) then
  420. begin
  421. case mmxbase of
  422. mmxs8bit:
  423. op:=A_PADDSB;
  424. mmxu8bit:
  425. op:=A_PADDUSB;
  426. mmxs16bit,mmxfixed16:
  427. op:=A_PADDSW;
  428. mmxu16bit:
  429. op:=A_PADDUSW;
  430. end;
  431. end
  432. else
  433. begin
  434. case mmxbase of
  435. mmxs8bit,mmxu8bit:
  436. op:=A_PADDB;
  437. mmxs16bit,mmxu16bit,mmxfixed16:
  438. op:=A_PADDW;
  439. mmxs32bit,mmxu32bit:
  440. op:=A_PADDD;
  441. end;
  442. end;
  443. end;
  444. muln :
  445. begin
  446. case mmxbase of
  447. mmxs16bit,mmxu16bit:
  448. op:=A_PMULLW;
  449. mmxfixed16:
  450. op:=A_PMULHW;
  451. end;
  452. end;
  453. subn :
  454. begin
  455. if (cs_mmx_saturation in current_settings.localswitches) then
  456. begin
  457. case mmxbase of
  458. mmxs8bit:
  459. op:=A_PSUBSB;
  460. mmxu8bit:
  461. op:=A_PSUBUSB;
  462. mmxs16bit,mmxfixed16:
  463. op:=A_PSUBSB;
  464. mmxu16bit:
  465. op:=A_PSUBUSW;
  466. end;
  467. end
  468. else
  469. begin
  470. case mmxbase of
  471. mmxs8bit,mmxu8bit:
  472. op:=A_PSUBB;
  473. mmxs16bit,mmxu16bit,mmxfixed16:
  474. op:=A_PSUBW;
  475. mmxs32bit,mmxu32bit:
  476. op:=A_PSUBD;
  477. end;
  478. end;
  479. end;
  480. xorn:
  481. op:=A_PXOR;
  482. orn:
  483. op:=A_POR;
  484. andn:
  485. op:=A_PAND;
  486. else
  487. internalerror(2003042214);
  488. end;
  489. { left and right no register? }
  490. { then one must be demanded }
  491. if (left.location.loc<>LOC_MMXREGISTER) then
  492. begin
  493. if (right.location.loc=LOC_MMXREGISTER) then
  494. begin
  495. location_swap(left.location,right.location);
  496. toggleflag(nf_swapped);
  497. end
  498. else
  499. begin
  500. { register variable ? }
  501. if (left.location.loc=LOC_CMMXREGISTER) then
  502. begin
  503. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  504. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  505. end
  506. else
  507. begin
  508. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  509. internalerror(200203245);
  510. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  511. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  512. end;
  513. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  514. left.location.register:=hregister;
  515. end;
  516. end;
  517. { at this point, left.location.loc should be LOC_MMXREGISTER }
  518. if right.location.loc<>LOC_MMXREGISTER then
  519. begin
  520. if (nodetype=subn) and (nf_swapped in flags) then
  521. begin
  522. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  523. if right.location.loc=LOC_CMMXREGISTER then
  524. begin
  525. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  526. emit_reg_reg(op,S_NO,left.location.register,hreg);
  527. end
  528. else
  529. begin
  530. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  531. internalerror(200203247);
  532. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  533. emit_reg_reg(op,S_NO,left.location.register,hreg);
  534. end;
  535. location.register:=hreg;
  536. end
  537. else
  538. begin
  539. if (right.location.loc=LOC_CMMXREGISTER) then
  540. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  541. else
  542. begin
  543. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  544. internalerror(200203246);
  545. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  546. end;
  547. location.register:=left.location.register;
  548. end;
  549. end
  550. else
  551. begin
  552. { right.location=LOC_MMXREGISTER }
  553. if (nodetype=subn) and (nf_swapped in flags) then
  554. begin
  555. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  556. location_swap(left.location,right.location);
  557. toggleflag(nf_swapped);
  558. end
  559. else
  560. begin
  561. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  562. end;
  563. location.register:=left.location.register;
  564. end;
  565. location_freetemp(current_asmdata.CurrAsmList,right.location);
  566. if cmpop then
  567. location_freetemp(current_asmdata.CurrAsmList,left.location);
  568. end;
  569. {$endif SUPPORT_MMX}
  570. {*****************************************************************************
  571. addmmxset
  572. *****************************************************************************}
  573. {$ifdef SUPPORT_MMX}
  574. procedure tx86addnode.second_opmmxset;
  575. var opsize : TCGSize;
  576. op : TAsmOp;
  577. cmpop,
  578. noswap : boolean;
  579. begin
  580. pass_left_right;
  581. cmpop:=false;
  582. noswap:=false;
  583. opsize:=OS_32;
  584. case nodetype of
  585. addn:
  586. begin
  587. { are we adding set elements ? }
  588. if right.nodetype=setelementn then
  589. begin
  590. { adding elements is not commutative }
  591. { if nf_swapped in flags then
  592. swapleftright;}
  593. { bts requires both elements to be registers }
  594. { location_force_reg(current_asmdata.CurrAsmList,left.location,opsize_2_cgsize[opsize],false);
  595. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize_2_cgsize[opsize],true);
  596. op:=A_BTS;
  597. noswap:=true;}
  598. end
  599. else
  600. op:=A_POR;
  601. end;
  602. symdifn :
  603. op:=A_PXOR;
  604. muln:
  605. op:=A_PAND;
  606. subn:
  607. op:=A_PANDN;
  608. equaln,
  609. unequaln :
  610. begin
  611. op:=A_PCMPEQD;
  612. cmpop:=true;
  613. end;
  614. lten,gten:
  615. begin
  616. if (not(nf_swapped in flags) and (nodetype = lten)) or
  617. ((nf_swapped in flags) and (nodetype = gten)) then
  618. swapleftright;
  619. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  620. emit_op_right_left(A_AND,opsize);
  621. op:=A_PCMPEQD;
  622. cmpop:=true;
  623. { warning: ugly hack, we need a JE so change the node to equaln }
  624. nodetype:=equaln;
  625. end;
  626. xorn :
  627. op:=A_PXOR;
  628. orn :
  629. op:=A_POR;
  630. andn :
  631. op:=A_PAND;
  632. else
  633. internalerror(2003042215);
  634. end;
  635. { left must be a register }
  636. left_must_be_reg(opsize,noswap);
  637. { emit_generic_code(op,opsize,true,extra_not,false);}
  638. location_freetemp(current_asmdata.CurrAsmList,right.location);
  639. if cmpop then
  640. location_freetemp(current_asmdata.CurrAsmList,left.location);
  641. end;
  642. {$endif SUPPORT_MMX}
  643. {*****************************************************************************
  644. AddFloat
  645. *****************************************************************************}
  646. procedure tx86addnode.second_addfloatsse;
  647. var
  648. op : topcg;
  649. begin
  650. pass_left_right;
  651. if (nf_swapped in flags) then
  652. swapleftright;
  653. case nodetype of
  654. addn :
  655. op:=OP_ADD;
  656. muln :
  657. op:=OP_MUL;
  658. subn :
  659. op:=OP_SUB;
  660. slashn :
  661. op:=OP_DIV;
  662. else
  663. internalerror(200312231);
  664. end;
  665. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  666. { we can use only right as left operand if the operation is commutative }
  667. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  668. begin
  669. location.register:=right.location.register;
  670. { force floating point reg. location to be written to memory,
  671. we don't force it to mm register because writing to memory
  672. allows probably shorter code because there is no direct fpu->mm register
  673. copy instruction
  674. }
  675. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  676. location_force_mem(current_asmdata.CurrAsmList,left.location);
  677. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  678. end
  679. else
  680. begin
  681. if (nf_swapped in flags) then
  682. swapleftright;
  683. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  684. location.register:=left.location.register;
  685. { force floating point reg. location to be written to memory,
  686. we don't force it to mm register because writing to memory
  687. allows probably shorter code because there is no direct fpu->mm register
  688. copy instruction
  689. }
  690. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  691. location_force_mem(current_asmdata.CurrAsmList,right.location);
  692. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  693. end;
  694. end;
  695. procedure tx86addnode.second_cmpfloatsse;
  696. var
  697. op : tasmop;
  698. begin
  699. if is_single(left.resultdef) then
  700. op:=A_COMISS
  701. else if is_double(left.resultdef) then
  702. op:=A_COMISD
  703. else
  704. internalerror(200402222);
  705. pass_left_right;
  706. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  707. { we can use only right as left operand if the operation is commutative }
  708. if (right.location.loc=LOC_MMREGISTER) then
  709. begin
  710. { force floating point reg. location to be written to memory,
  711. we don't force it to mm register because writing to memory
  712. allows probably shorter code because there is no direct fpu->mm register
  713. copy instruction
  714. }
  715. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  716. location_force_mem(current_asmdata.CurrAsmList,left.location);
  717. case left.location.loc of
  718. LOC_REFERENCE,LOC_CREFERENCE:
  719. begin
  720. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  721. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  722. end;
  723. LOC_MMREGISTER,LOC_CMMREGISTER:
  724. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  725. else
  726. internalerror(200402221);
  727. end;
  728. if nf_swapped in flags then
  729. exclude(flags,nf_swapped)
  730. else
  731. include(flags,nf_swapped)
  732. end
  733. else
  734. begin
  735. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  736. { force floating point reg. location to be written to memory,
  737. we don't force it to mm register because writing to memory
  738. allows probably shorter code because there is no direct fpu->mm register
  739. copy instruction
  740. }
  741. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  742. location_force_mem(current_asmdata.CurrAsmList,right.location);
  743. case right.location.loc of
  744. LOC_REFERENCE,LOC_CREFERENCE:
  745. begin
  746. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  747. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  748. end;
  749. LOC_MMREGISTER,LOC_CMMREGISTER:
  750. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  751. else
  752. internalerror(200402223);
  753. end;
  754. end;
  755. location.resflags:=getresflags(true);
  756. end;
  757. procedure tx86addnode.second_opvector;
  758. var
  759. op : topcg;
  760. begin
  761. pass_left_right;
  762. if (nf_swapped in flags) then
  763. swapleftright;
  764. case nodetype of
  765. addn :
  766. op:=OP_ADD;
  767. muln :
  768. op:=OP_MUL;
  769. subn :
  770. op:=OP_SUB;
  771. slashn :
  772. op:=OP_DIV;
  773. else
  774. internalerror(200610071);
  775. end;
  776. if fits_in_mm_register(left.resultdef) then
  777. begin
  778. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  779. { we can use only right as left operand if the operation is commutative }
  780. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  781. begin
  782. location.register:=right.location.register;
  783. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  784. end
  785. else
  786. begin
  787. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  788. location.register:=left.location.register;
  789. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  790. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  791. end;
  792. end
  793. else
  794. begin
  795. { not yet supported }
  796. internalerror(200610072);
  797. end
  798. end;
  799. procedure tx86addnode.second_addfloat;
  800. var
  801. op : TAsmOp;
  802. begin
  803. if use_sse(resultdef) then
  804. begin
  805. second_addfloatsse;
  806. exit;
  807. end;
  808. pass_left_right;
  809. case nodetype of
  810. addn :
  811. op:=A_FADDP;
  812. muln :
  813. op:=A_FMULP;
  814. subn :
  815. op:=A_FSUBP;
  816. slashn :
  817. op:=A_FDIVP;
  818. else
  819. internalerror(2003042214);
  820. end;
  821. left_and_right_must_be_fpureg;
  822. { if we swaped the tree nodes, then use the reverse operator }
  823. if nf_swapped in flags then
  824. begin
  825. if (nodetype=slashn) then
  826. op:=A_FDIVRP
  827. else if (nodetype=subn) then
  828. op:=A_FSUBRP;
  829. end;
  830. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  831. tcgx86(cg).dec_fpu_stack;
  832. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  833. location.register:=NR_ST;
  834. end;
  835. procedure tx86addnode.second_cmpfloat;
  836. var
  837. resflags : tresflags;
  838. begin
  839. if use_sse(left.resultdef) or use_sse(right.resultdef) then
  840. begin
  841. second_cmpfloatsse;
  842. exit;
  843. end;
  844. pass_left_right;
  845. left_and_right_must_be_fpureg;
  846. {$ifndef x86_64}
  847. if current_settings.cputype<cpu_Pentium2 then
  848. begin
  849. emit_none(A_FCOMPP,S_NO);
  850. tcgx86(cg).dec_fpu_stack;
  851. tcgx86(cg).dec_fpu_stack;
  852. { load fpu flags }
  853. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  854. emit_reg(A_FNSTSW,S_NO,NR_AX);
  855. emit_none(A_SAHF,S_NO);
  856. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  857. if nf_swapped in flags then
  858. begin
  859. case nodetype of
  860. equaln : resflags:=F_E;
  861. unequaln : resflags:=F_NE;
  862. ltn : resflags:=F_A;
  863. lten : resflags:=F_AE;
  864. gtn : resflags:=F_B;
  865. gten : resflags:=F_BE;
  866. end;
  867. end
  868. else
  869. begin
  870. case nodetype of
  871. equaln : resflags:=F_E;
  872. unequaln : resflags:=F_NE;
  873. ltn : resflags:=F_B;
  874. lten : resflags:=F_BE;
  875. gtn : resflags:=F_A;
  876. gten : resflags:=F_AE;
  877. end;
  878. end;
  879. end
  880. else
  881. {$endif x86_64}
  882. begin
  883. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  884. { fcomip pops only one fpu register }
  885. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  886. tcgx86(cg).dec_fpu_stack;
  887. tcgx86(cg).dec_fpu_stack;
  888. { load fpu flags }
  889. if nf_swapped in flags then
  890. begin
  891. case nodetype of
  892. equaln : resflags:=F_E;
  893. unequaln : resflags:=F_NE;
  894. ltn : resflags:=F_A;
  895. lten : resflags:=F_AE;
  896. gtn : resflags:=F_B;
  897. gten : resflags:=F_BE;
  898. end;
  899. end
  900. else
  901. begin
  902. case nodetype of
  903. equaln : resflags:=F_E;
  904. unequaln : resflags:=F_NE;
  905. ltn : resflags:=F_B;
  906. lten : resflags:=F_BE;
  907. gtn : resflags:=F_A;
  908. gten : resflags:=F_AE;
  909. end;
  910. end;
  911. end;
  912. location_reset(location,LOC_FLAGS,OS_NO);
  913. location.resflags:=resflags;
  914. end;
  915. {*****************************************************************************
  916. Add64bit
  917. *****************************************************************************}
  918. procedure tx86addnode.second_add64bit;
  919. begin
  920. {$ifdef cpu64bit}
  921. second_addordinal;
  922. {$else cpu64bit}
  923. { must be implemented separate }
  924. internalerror(200402042);
  925. {$endif cpu64bit}
  926. end;
  927. procedure tx86addnode.second_cmp64bit;
  928. begin
  929. {$ifdef cpu64bit}
  930. second_cmpordinal;
  931. {$else cpu64bit}
  932. { must be implemented separate }
  933. internalerror(200402043);
  934. {$endif cpu64bit}
  935. end;
  936. {*****************************************************************************
  937. AddOrdinal
  938. *****************************************************************************}
  939. procedure tx86addnode.second_addordinal;
  940. begin
  941. { filter unsigned MUL opcode, which requires special handling }
  942. if (nodetype=muln) and
  943. (not(is_signed(left.resultdef)) or
  944. not(is_signed(right.resultdef))) then
  945. begin
  946. second_mul;
  947. exit;
  948. end;
  949. inherited second_addordinal;
  950. end;
  951. procedure tx86addnode.second_cmpordinal;
  952. var
  953. opsize : tcgsize;
  954. unsigned : boolean;
  955. begin
  956. unsigned:=not(is_signed(left.resultdef)) or
  957. not(is_signed(right.resultdef));
  958. opsize:=def_cgsize(left.resultdef);
  959. pass_left_right;
  960. left_must_be_reg(opsize,false);
  961. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  962. location_freetemp(current_asmdata.CurrAsmList,right.location);
  963. location_freetemp(current_asmdata.CurrAsmList,left.location);
  964. location_reset(location,LOC_FLAGS,OS_NO);
  965. location.resflags:=getresflags(unsigned);
  966. end;
  967. begin
  968. caddnode:=tx86addnode;
  969. end.