nx86add.pas 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatsse;
  34. procedure second_addfloatsse;
  35. public
  36. procedure second_addfloat;override;
  37. {$ifndef i8086}
  38. procedure second_addsmallset;override;
  39. {$endif not i8086}
  40. procedure second_add64bit;override;
  41. procedure second_cmpfloat;override;
  42. procedure second_cmpsmallset;override;
  43. procedure second_cmp64bit;override;
  44. procedure second_cmpordinal;override;
  45. {$ifdef SUPPORT_MMX}
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. procedure second_opvector;override;
  49. end;
  50. implementation
  51. uses
  52. globtype,globals,
  53. verbose,cutils,
  54. cpuinfo,
  55. aasmbase,aasmtai,aasmdata,aasmcpu,
  56. symconst,symdef,
  57. cgobj,hlcgobj,cgx86,cga,cgutils,
  58. paramgr,tgobj,ncgutil,
  59. ncon,nset,ninl,
  60. defutil;
  61. {*****************************************************************************
  62. Helpers
  63. *****************************************************************************}
  64. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  65. var
  66. power : longint;
  67. hl4 : tasmlabel;
  68. r : Tregister;
  69. begin
  70. { at this point, left.location.loc should be LOC_REGISTER }
  71. if right.location.loc=LOC_REGISTER then
  72. begin
  73. { right.location is a LOC_REGISTER }
  74. { when swapped another result register }
  75. if (nodetype=subn) and (nf_swapped in flags) then
  76. begin
  77. if extra_not then
  78. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  79. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  80. { newly swapped also set swapped flag }
  81. location_swap(left.location,right.location);
  82. toggleflag(nf_swapped);
  83. end
  84. else
  85. begin
  86. if extra_not then
  87. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  88. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  89. location_swap(left.location,right.location);
  90. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  91. end;
  92. end
  93. else
  94. begin
  95. { right.location is not a LOC_REGISTER }
  96. if (nodetype=subn) and (nf_swapped in flags) then
  97. begin
  98. if extra_not then
  99. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  100. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  101. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,hlcg.tcgsize2orddef(opsize),right.location,r);
  102. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  103. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  104. end
  105. else
  106. begin
  107. { Optimizations when right.location is a constant value }
  108. if (op=A_CMP) and
  109. (nodetype in [equaln,unequaln]) and
  110. (right.location.loc=LOC_CONSTANT) and
  111. (right.location.value=0) then
  112. begin
  113. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  114. end
  115. else
  116. if (op=A_ADD) and
  117. (right.location.loc=LOC_CONSTANT) and
  118. (right.location.value=1) and
  119. not(cs_check_overflow in current_settings.localswitches) then
  120. begin
  121. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  122. end
  123. else
  124. if (op=A_SUB) and
  125. (right.location.loc=LOC_CONSTANT) and
  126. (right.location.value=1) and
  127. not(cs_check_overflow in current_settings.localswitches) then
  128. begin
  129. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  130. end
  131. else
  132. if (op=A_IMUL) and
  133. (right.location.loc=LOC_CONSTANT) and
  134. (ispowerof2(int64(right.location.value),power)) and
  135. not(cs_check_overflow in current_settings.localswitches) then
  136. begin
  137. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  138. end
  139. else
  140. begin
  141. if extra_not then
  142. begin
  143. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  144. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,hlcg.tcgsize2orddef(opsize),right.location,r);
  145. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  146. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  147. end
  148. else
  149. begin
  150. emit_op_right_left(op,opsize);
  151. end;
  152. end;
  153. end;
  154. end;
  155. { only in case of overflow operations }
  156. { produce overflow code }
  157. { we must put it here directly, because sign of operation }
  158. { is in unsigned VAR!! }
  159. if mboverflow then
  160. begin
  161. if cs_check_overflow in current_settings.localswitches then
  162. begin
  163. current_asmdata.getjumplabel(hl4);
  164. if unsigned then
  165. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  166. else
  167. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  168. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  169. cg.a_label(current_asmdata.CurrAsmList,hl4);
  170. end;
  171. end;
  172. end;
  173. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  174. begin
  175. { left location is not a register? }
  176. if (left.location.loc<>LOC_REGISTER) then
  177. begin
  178. { if right is register then we can swap the locations }
  179. if (not noswap) and
  180. (right.location.loc=LOC_REGISTER) then
  181. begin
  182. location_swap(left.location,right.location);
  183. toggleflag(nf_swapped);
  184. end
  185. else
  186. begin
  187. { maybe we can reuse a constant register when the
  188. operation is a comparison that doesn't change the
  189. value of the register }
  190. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  191. end;
  192. end;
  193. if (right.location.loc<>LOC_CONSTANT) and
  194. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  195. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  196. if (left.location.loc<>LOC_CONSTANT) and
  197. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  198. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  199. end;
  200. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  201. begin
  202. if (right.location.loc<>LOC_FPUREGISTER) then
  203. begin
  204. if (force_fpureg) then
  205. begin
  206. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  207. if (left.location.loc<>LOC_FPUREGISTER) then
  208. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  209. else
  210. { left was on the stack => swap }
  211. toggleflag(nf_swapped);
  212. end
  213. end
  214. { the nominator in st0 }
  215. else if (left.location.loc<>LOC_FPUREGISTER) then
  216. begin
  217. if (force_fpureg) then
  218. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  219. end
  220. else
  221. begin
  222. { fpu operands are always in the wrong order on the stack }
  223. toggleflag(nf_swapped);
  224. end;
  225. end;
  226. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  227. {$ifdef x86_64}
  228. var
  229. tmpreg : tregister;
  230. {$endif x86_64}
  231. begin
  232. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  233. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  234. { left must be a register }
  235. case right.location.loc of
  236. LOC_REGISTER,
  237. LOC_CREGISTER :
  238. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  239. LOC_REFERENCE,
  240. LOC_CREFERENCE :
  241. begin
  242. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  243. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  244. end;
  245. LOC_CONSTANT :
  246. begin
  247. {$ifdef x86_64}
  248. { x86_64 only supports signed 32 bits constants directly }
  249. if (opsize in [OS_S64,OS_64]) and
  250. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  251. begin
  252. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  253. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  254. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  255. end
  256. else
  257. {$endif x86_64}
  258. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  259. end;
  260. else
  261. internalerror(200203232);
  262. end;
  263. end;
  264. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  265. begin
  266. case nodetype of
  267. equaln : getresflags:=F_E;
  268. unequaln : getresflags:=F_NE;
  269. else
  270. if not(unsigned) then
  271. begin
  272. if nf_swapped in flags then
  273. case nodetype of
  274. ltn : getresflags:=F_G;
  275. lten : getresflags:=F_GE;
  276. gtn : getresflags:=F_L;
  277. gten : getresflags:=F_LE;
  278. end
  279. else
  280. case nodetype of
  281. ltn : getresflags:=F_L;
  282. lten : getresflags:=F_LE;
  283. gtn : getresflags:=F_G;
  284. gten : getresflags:=F_GE;
  285. end;
  286. end
  287. else
  288. begin
  289. if nf_swapped in flags then
  290. case nodetype of
  291. ltn : getresflags:=F_A;
  292. lten : getresflags:=F_AE;
  293. gtn : getresflags:=F_B;
  294. gten : getresflags:=F_BE;
  295. end
  296. else
  297. case nodetype of
  298. ltn : getresflags:=F_B;
  299. lten : getresflags:=F_BE;
  300. gtn : getresflags:=F_A;
  301. gten : getresflags:=F_AE;
  302. end;
  303. end;
  304. end;
  305. end;
  306. {*****************************************************************************
  307. AddSmallSet
  308. *****************************************************************************}
  309. {$ifndef i8086}
  310. procedure tx86addnode.second_addsmallset;
  311. var
  312. setbase : aint;
  313. opdef : tdef;
  314. opsize : TCGSize;
  315. op : TAsmOp;
  316. extra_not,
  317. noswap : boolean;
  318. all_member_optimization:boolean;
  319. begin
  320. pass_left_right;
  321. noswap:=false;
  322. extra_not:=false;
  323. all_member_optimization:=false;
  324. opdef:=resultdef;
  325. opsize:=int_cgsize(opdef.size);
  326. if (left.resultdef.typ=setdef) then
  327. setbase:=tsetdef(left.resultdef).setbase
  328. else
  329. setbase:=tsetdef(right.resultdef).setbase;
  330. case nodetype of
  331. addn :
  332. begin
  333. { adding elements is not commutative }
  334. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  335. swapleftright;
  336. { are we adding set elements ? }
  337. if right.nodetype=setelementn then
  338. begin
  339. { no range support for smallsets! }
  340. if assigned(tsetelementnode(right).right) then
  341. internalerror(43244);
  342. { btsb isn't supported }
  343. if opsize=OS_8 then
  344. begin
  345. opsize:=OS_32;
  346. opdef:=u32inttype;
  347. end;
  348. { bts requires both elements to be registers }
  349. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  350. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  351. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  352. op:=A_BTS;
  353. noswap:=true;
  354. end
  355. else
  356. op:=A_OR;
  357. end;
  358. symdifn :
  359. op:=A_XOR;
  360. muln :
  361. op:=A_AND;
  362. subn :
  363. begin
  364. op:=A_AND;
  365. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  366. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  367. all_member_optimization:=true;
  368. if (not(nf_swapped in flags)) and
  369. (right.location.loc=LOC_CONSTANT) then
  370. right.location.value := not(right.location.value)
  371. else if (nf_swapped in flags) and
  372. (left.location.loc=LOC_CONSTANT) then
  373. left.location.value := not(left.location.value)
  374. else
  375. extra_not:=true;
  376. end;
  377. xorn :
  378. op:=A_XOR;
  379. orn :
  380. op:=A_OR;
  381. andn :
  382. op:=A_AND;
  383. else
  384. internalerror(2003042215);
  385. end;
  386. if all_member_optimization then
  387. begin
  388. {A set expression [0..31]-x can be implemented with a simple NOT.}
  389. if nf_swapped in flags then
  390. begin
  391. { newly swapped also set swapped flag }
  392. location_swap(left.location,right.location);
  393. toggleflag(nf_swapped);
  394. end;
  395. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  396. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  397. location:=right.location;
  398. end
  399. else
  400. begin
  401. { left must be a register }
  402. left_must_be_reg(opdef,opsize,noswap);
  403. emit_generic_code(op,opsize,true,extra_not,false);
  404. location_freetemp(current_asmdata.CurrAsmList,right.location);
  405. { left is always a register and contains the result }
  406. location:=left.location;
  407. end;
  408. { fix the changed opsize we did above because of the missing btsb }
  409. if opsize<>int_cgsize(resultdef.size) then
  410. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,hlcg.tcgsize2orddef(int_cgsize(resultdef.size)),false);
  411. end;
  412. {$endif not i8086}
  413. procedure tx86addnode.second_cmpsmallset;
  414. var
  415. opdef : tdef;
  416. opsize : TCGSize;
  417. op : TAsmOp;
  418. begin
  419. pass_left_right;
  420. opdef:=left.resultdef;
  421. opsize:=int_cgsize(opdef.size);
  422. case nodetype of
  423. equaln,
  424. unequaln :
  425. op:=A_CMP;
  426. lten,gten:
  427. begin
  428. if (not(nf_swapped in flags) and (nodetype = lten)) or
  429. ((nf_swapped in flags) and (nodetype = gten)) then
  430. swapleftright;
  431. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  432. emit_op_right_left(A_AND,opsize);
  433. op:=A_CMP;
  434. { warning: ugly hack, we need a JE so change the node to equaln }
  435. nodetype:=equaln;
  436. end;
  437. else
  438. internalerror(2003042215);
  439. end;
  440. { left must be a register }
  441. left_must_be_reg(opdef,opsize,false);
  442. emit_generic_code(op,opsize,true,false,false);
  443. location_freetemp(current_asmdata.CurrAsmList,right.location);
  444. location_freetemp(current_asmdata.CurrAsmList,left.location);
  445. location_reset(location,LOC_FLAGS,OS_NO);
  446. location.resflags:=getresflags(true);
  447. end;
  448. {*****************************************************************************
  449. AddMMX
  450. *****************************************************************************}
  451. {$ifdef SUPPORT_MMX}
  452. procedure tx86addnode.second_opmmx;
  453. var
  454. op : TAsmOp;
  455. cmpop : boolean;
  456. mmxbase : tmmxtype;
  457. hreg,
  458. hregister : tregister;
  459. begin
  460. pass_left_right;
  461. cmpop:=false;
  462. mmxbase:=mmx_type(left.resultdef);
  463. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  464. case nodetype of
  465. addn :
  466. begin
  467. if (cs_mmx_saturation in current_settings.localswitches) then
  468. begin
  469. case mmxbase of
  470. mmxs8bit:
  471. op:=A_PADDSB;
  472. mmxu8bit:
  473. op:=A_PADDUSB;
  474. mmxs16bit,mmxfixed16:
  475. op:=A_PADDSW;
  476. mmxu16bit:
  477. op:=A_PADDUSW;
  478. end;
  479. end
  480. else
  481. begin
  482. case mmxbase of
  483. mmxs8bit,mmxu8bit:
  484. op:=A_PADDB;
  485. mmxs16bit,mmxu16bit,mmxfixed16:
  486. op:=A_PADDW;
  487. mmxs32bit,mmxu32bit:
  488. op:=A_PADDD;
  489. end;
  490. end;
  491. end;
  492. muln :
  493. begin
  494. case mmxbase of
  495. mmxs16bit,mmxu16bit:
  496. op:=A_PMULLW;
  497. mmxfixed16:
  498. op:=A_PMULHW;
  499. end;
  500. end;
  501. subn :
  502. begin
  503. if (cs_mmx_saturation in current_settings.localswitches) then
  504. begin
  505. case mmxbase of
  506. mmxs8bit:
  507. op:=A_PSUBSB;
  508. mmxu8bit:
  509. op:=A_PSUBUSB;
  510. mmxs16bit,mmxfixed16:
  511. op:=A_PSUBSB;
  512. mmxu16bit:
  513. op:=A_PSUBUSW;
  514. end;
  515. end
  516. else
  517. begin
  518. case mmxbase of
  519. mmxs8bit,mmxu8bit:
  520. op:=A_PSUBB;
  521. mmxs16bit,mmxu16bit,mmxfixed16:
  522. op:=A_PSUBW;
  523. mmxs32bit,mmxu32bit:
  524. op:=A_PSUBD;
  525. end;
  526. end;
  527. end;
  528. xorn:
  529. op:=A_PXOR;
  530. orn:
  531. op:=A_POR;
  532. andn:
  533. op:=A_PAND;
  534. else
  535. internalerror(2003042214);
  536. end;
  537. { left and right no register? }
  538. { then one must be demanded }
  539. if (left.location.loc<>LOC_MMXREGISTER) then
  540. begin
  541. if (right.location.loc=LOC_MMXREGISTER) then
  542. begin
  543. location_swap(left.location,right.location);
  544. toggleflag(nf_swapped);
  545. end
  546. else
  547. begin
  548. { register variable ? }
  549. if (left.location.loc=LOC_CMMXREGISTER) then
  550. begin
  551. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  552. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  553. end
  554. else
  555. begin
  556. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  557. internalerror(200203245);
  558. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  559. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  560. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  561. end;
  562. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  563. left.location.register:=hregister;
  564. end;
  565. end;
  566. { at this point, left.location.loc should be LOC_MMXREGISTER }
  567. if right.location.loc<>LOC_MMXREGISTER then
  568. begin
  569. if (nodetype=subn) and (nf_swapped in flags) then
  570. begin
  571. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  572. if right.location.loc=LOC_CMMXREGISTER then
  573. begin
  574. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  575. emit_reg_reg(op,S_NO,left.location.register,hreg);
  576. end
  577. else
  578. begin
  579. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  580. internalerror(200203247);
  581. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  582. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  583. emit_reg_reg(op,S_NO,left.location.register,hreg);
  584. end;
  585. location.register:=hreg;
  586. end
  587. else
  588. begin
  589. if (right.location.loc=LOC_CMMXREGISTER) then
  590. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  591. else
  592. begin
  593. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  594. internalerror(200203246);
  595. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  596. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  597. end;
  598. location.register:=left.location.register;
  599. end;
  600. end
  601. else
  602. begin
  603. { right.location=LOC_MMXREGISTER }
  604. if (nodetype=subn) and (nf_swapped in flags) then
  605. begin
  606. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  607. location_swap(left.location,right.location);
  608. toggleflag(nf_swapped);
  609. end
  610. else
  611. begin
  612. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  613. end;
  614. location.register:=left.location.register;
  615. end;
  616. location_freetemp(current_asmdata.CurrAsmList,right.location);
  617. if cmpop then
  618. location_freetemp(current_asmdata.CurrAsmList,left.location);
  619. end;
  620. {$endif SUPPORT_MMX}
  621. {*****************************************************************************
  622. AddFloat
  623. *****************************************************************************}
  624. procedure tx86addnode.second_addfloatsse;
  625. var
  626. op : topcg;
  627. sqr_sum : boolean;
  628. tmp : tnode;
  629. begin
  630. sqr_sum:=false;
  631. if (current_settings.fputype>=fpu_sse3) and
  632. use_vectorfpu(resultdef) and
  633. (nodetype in [addn,subn]) and
  634. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  635. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  636. begin
  637. sqr_sum:=true;
  638. tmp:=tinlinenode(left).left;
  639. tinlinenode(left).left:=nil;
  640. left.free;
  641. left:=tmp;
  642. tmp:=tinlinenode(right).left;
  643. tinlinenode(right).left:=nil;
  644. right.free;
  645. right:=tmp;
  646. end;
  647. pass_left_right;
  648. check_left_and_right_fpureg(false);
  649. if (nf_swapped in flags) then
  650. { can't use swapleftright if both are on the fpu stack, since then }
  651. { both are "R_ST" -> nothing would change -> manually switch }
  652. if (left.location.loc = LOC_FPUREGISTER) and
  653. (right.location.loc = LOC_FPUREGISTER) then
  654. emit_none(A_FXCH,S_NO)
  655. else
  656. swapleftright;
  657. case nodetype of
  658. addn :
  659. op:=OP_ADD;
  660. muln :
  661. op:=OP_MUL;
  662. subn :
  663. op:=OP_SUB;
  664. slashn :
  665. op:=OP_DIV;
  666. else
  667. internalerror(200312231);
  668. end;
  669. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  670. if sqr_sum then
  671. begin
  672. if nf_swapped in flags then
  673. swapleftright;
  674. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  675. location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
  676. location:=left.location;
  677. if is_double(resultdef) then
  678. begin
  679. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  680. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  681. case nodetype of
  682. addn:
  683. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  684. subn:
  685. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  686. else
  687. internalerror(201108162);
  688. end;
  689. end
  690. else
  691. begin
  692. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  693. { ensure that bits 64..127 contain valid values }
  694. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  695. { the data is now in bits 0..32 and 64..95 }
  696. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  697. case nodetype of
  698. addn:
  699. begin
  700. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  701. end;
  702. subn:
  703. begin
  704. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  705. end;
  706. else
  707. internalerror(201108163);
  708. end;
  709. end
  710. end
  711. { we can use only right as left operand if the operation is commutative }
  712. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  713. begin
  714. location.register:=right.location.register;
  715. { force floating point reg. location to be written to memory,
  716. we don't force it to mm register because writing to memory
  717. allows probably shorter code because there is no direct fpu->mm register
  718. copy instruction
  719. }
  720. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  721. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  722. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  723. end
  724. else
  725. begin
  726. if (nf_swapped in flags) then
  727. swapleftright;
  728. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  729. location.register:=left.location.register;
  730. { force floating point reg. location to be written to memory,
  731. we don't force it to mm register because writing to memory
  732. allows probably shorter code because there is no direct fpu->mm register
  733. copy instruction
  734. }
  735. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  736. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  737. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  738. end;
  739. end;
  740. procedure tx86addnode.second_cmpfloatsse;
  741. var
  742. op : tasmop;
  743. begin
  744. if is_single(left.resultdef) then
  745. op:=A_COMISS
  746. else if is_double(left.resultdef) then
  747. op:=A_COMISD
  748. else
  749. internalerror(200402222);
  750. pass_left_right;
  751. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  752. { we can use only right as left operand if the operation is commutative }
  753. if (right.location.loc=LOC_MMREGISTER) then
  754. begin
  755. { force floating point reg. location to be written to memory,
  756. we don't force it to mm register because writing to memory
  757. allows probably shorter code because there is no direct fpu->mm register
  758. copy instruction
  759. }
  760. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  761. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  762. case left.location.loc of
  763. LOC_REFERENCE,LOC_CREFERENCE:
  764. begin
  765. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  766. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  767. end;
  768. LOC_MMREGISTER,LOC_CMMREGISTER:
  769. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  770. else
  771. internalerror(200402221);
  772. end;
  773. if nf_swapped in flags then
  774. exclude(flags,nf_swapped)
  775. else
  776. include(flags,nf_swapped)
  777. end
  778. else
  779. begin
  780. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  781. { force floating point reg. location to be written to memory,
  782. we don't force it to mm register because writing to memory
  783. allows probably shorter code because there is no direct fpu->mm register
  784. copy instruction
  785. }
  786. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  787. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  788. case right.location.loc of
  789. LOC_REFERENCE,LOC_CREFERENCE:
  790. begin
  791. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  792. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  793. end;
  794. LOC_MMREGISTER,LOC_CMMREGISTER:
  795. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  796. else
  797. internalerror(200402223);
  798. end;
  799. end;
  800. location.resflags:=getresflags(true);
  801. end;
  802. procedure tx86addnode.second_opvector;
  803. var
  804. op : topcg;
  805. begin
  806. pass_left_right;
  807. if (nf_swapped in flags) then
  808. swapleftright;
  809. case nodetype of
  810. addn :
  811. op:=OP_ADD;
  812. muln :
  813. op:=OP_MUL;
  814. subn :
  815. op:=OP_SUB;
  816. slashn :
  817. op:=OP_DIV;
  818. else
  819. internalerror(200610071);
  820. end;
  821. if fits_in_mm_register(left.resultdef) then
  822. begin
  823. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  824. { we can use only right as left operand if the operation is commutative }
  825. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  826. begin
  827. location.register:=right.location.register;
  828. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  829. end
  830. else
  831. begin
  832. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  833. location.register:=left.location.register;
  834. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  835. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  836. end;
  837. end
  838. else
  839. begin
  840. { not yet supported }
  841. internalerror(200610072);
  842. end
  843. end;
  844. procedure tx86addnode.second_addfloat;
  845. var
  846. op : TAsmOp;
  847. begin
  848. if use_vectorfpu(resultdef) then
  849. begin
  850. second_addfloatsse;
  851. exit;
  852. end;
  853. pass_left_right;
  854. case nodetype of
  855. addn :
  856. op:=A_FADDP;
  857. muln :
  858. op:=A_FMULP;
  859. subn :
  860. op:=A_FSUBP;
  861. slashn :
  862. op:=A_FDIVP;
  863. else
  864. internalerror(2003042214);
  865. end;
  866. check_left_and_right_fpureg(true);
  867. { if we swaped the tree nodes, then use the reverse operator }
  868. if nf_swapped in flags then
  869. begin
  870. if (nodetype=slashn) then
  871. op:=A_FDIVRP
  872. else if (nodetype=subn) then
  873. op:=A_FSUBRP;
  874. end;
  875. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  876. tcgx86(cg).dec_fpu_stack;
  877. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  878. location.register:=NR_ST;
  879. end;
  880. procedure tx86addnode.second_cmpfloat;
  881. var
  882. resflags : tresflags;
  883. {$ifdef i8086}
  884. tmpref: treference;
  885. {$endif i8086}
  886. begin
  887. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  888. begin
  889. second_cmpfloatsse;
  890. exit;
  891. end;
  892. pass_left_right;
  893. check_left_and_right_fpureg(true);
  894. {$ifndef x86_64}
  895. if current_settings.cputype<cpu_Pentium2 then
  896. begin
  897. emit_none(A_FCOMPP,S_NO);
  898. tcgx86(cg).dec_fpu_stack;
  899. tcgx86(cg).dec_fpu_stack;
  900. { load fpu flags }
  901. {$ifdef i8086}
  902. if current_settings.cputype < cpu_286 then
  903. begin
  904. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  905. emit_ref(A_FNSTSW,S_NO,tmpref);
  906. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  907. emit_ref_reg(A_MOV,S_W,tmpref,NR_AX);
  908. emit_none(A_SAHF,S_NO);
  909. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  910. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  911. end
  912. else
  913. {$endif i8086}
  914. begin
  915. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  916. emit_reg(A_FNSTSW,S_NO,NR_AX);
  917. emit_none(A_SAHF,S_NO);
  918. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  919. end;
  920. if nf_swapped in flags then
  921. begin
  922. case nodetype of
  923. equaln : resflags:=F_E;
  924. unequaln : resflags:=F_NE;
  925. ltn : resflags:=F_A;
  926. lten : resflags:=F_AE;
  927. gtn : resflags:=F_B;
  928. gten : resflags:=F_BE;
  929. end;
  930. end
  931. else
  932. begin
  933. case nodetype of
  934. equaln : resflags:=F_E;
  935. unequaln : resflags:=F_NE;
  936. ltn : resflags:=F_B;
  937. lten : resflags:=F_BE;
  938. gtn : resflags:=F_A;
  939. gten : resflags:=F_AE;
  940. end;
  941. end;
  942. end
  943. else
  944. {$endif x86_64}
  945. begin
  946. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  947. { fcomip pops only one fpu register }
  948. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  949. tcgx86(cg).dec_fpu_stack;
  950. tcgx86(cg).dec_fpu_stack;
  951. { load fpu flags }
  952. if nf_swapped in flags then
  953. begin
  954. case nodetype of
  955. equaln : resflags:=F_E;
  956. unequaln : resflags:=F_NE;
  957. ltn : resflags:=F_A;
  958. lten : resflags:=F_AE;
  959. gtn : resflags:=F_B;
  960. gten : resflags:=F_BE;
  961. end;
  962. end
  963. else
  964. begin
  965. case nodetype of
  966. equaln : resflags:=F_E;
  967. unequaln : resflags:=F_NE;
  968. ltn : resflags:=F_B;
  969. lten : resflags:=F_BE;
  970. gtn : resflags:=F_A;
  971. gten : resflags:=F_AE;
  972. end;
  973. end;
  974. end;
  975. location_reset(location,LOC_FLAGS,OS_NO);
  976. location.resflags:=resflags;
  977. end;
  978. {*****************************************************************************
  979. Add64bit
  980. *****************************************************************************}
  981. procedure tx86addnode.second_add64bit;
  982. begin
  983. {$ifdef cpu64bitalu}
  984. second_addordinal;
  985. {$else cpu64bitalu}
  986. { must be implemented separate }
  987. internalerror(200402042);
  988. {$endif cpu64bitalu}
  989. end;
  990. procedure tx86addnode.second_cmp64bit;
  991. begin
  992. {$ifdef cpu64bitalu}
  993. second_cmpordinal;
  994. {$else cpu64bitalu}
  995. { must be implemented separate }
  996. internalerror(200402043);
  997. {$endif cpu64bitalu}
  998. end;
  999. {*****************************************************************************
  1000. AddOrdinal
  1001. *****************************************************************************}
  1002. procedure tx86addnode.second_cmpordinal;
  1003. var
  1004. opdef : tdef;
  1005. opsize : tcgsize;
  1006. unsigned : boolean;
  1007. begin
  1008. unsigned:=not(is_signed(left.resultdef)) or
  1009. not(is_signed(right.resultdef));
  1010. opdef:=left.resultdef;
  1011. opsize:=def_cgsize(opdef);
  1012. pass_left_right;
  1013. left_must_be_reg(opdef,opsize,false);
  1014. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1015. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1016. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1017. location_reset(location,LOC_FLAGS,OS_NO);
  1018. location.resflags:=getresflags(unsigned);
  1019. end;
  1020. begin
  1021. caddnode:=tx86addnode;
  1022. end.