nx86add.pas 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatsse;
  34. procedure second_cmpfloatavx;
  35. procedure second_addfloatsse;
  36. procedure second_addfloatavx;
  37. public
  38. procedure second_addfloat;override;
  39. {$ifndef i8086}
  40. procedure second_addsmallset;override;
  41. {$endif not i8086}
  42. procedure second_add64bit;override;
  43. procedure second_cmpfloat;override;
  44. procedure second_cmpsmallset;override;
  45. procedure second_cmp64bit;override;
  46. procedure second_cmpordinal;override;
  47. {$ifdef SUPPORT_MMX}
  48. procedure second_opmmx;override;
  49. {$endif SUPPORT_MMX}
  50. procedure second_opvector;override;
  51. end;
  52. implementation
  53. uses
  54. globtype,globals,
  55. verbose,cutils,
  56. cpuinfo,
  57. aasmbase,aasmtai,aasmdata,aasmcpu,
  58. symconst,symdef,
  59. cgobj,hlcgobj,cgx86,cga,cgutils,
  60. paramgr,tgobj,ncgutil,
  61. ncon,nset,ninl,
  62. defutil;
  63. {*****************************************************************************
  64. Helpers
  65. *****************************************************************************}
  66. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  67. var
  68. power : longint;
  69. hl4 : tasmlabel;
  70. r : Tregister;
  71. begin
  72. { at this point, left.location.loc should be LOC_REGISTER }
  73. if right.location.loc=LOC_REGISTER then
  74. begin
  75. { right.location is a LOC_REGISTER }
  76. { when swapped another result register }
  77. if (nodetype=subn) and (nf_swapped in flags) then
  78. begin
  79. if extra_not then
  80. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  81. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  82. { newly swapped also set swapped flag }
  83. location_swap(left.location,right.location);
  84. toggleflag(nf_swapped);
  85. end
  86. else
  87. begin
  88. if extra_not then
  89. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  90. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  91. location_swap(left.location,right.location);
  92. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  93. end;
  94. end
  95. else
  96. begin
  97. { right.location is not a LOC_REGISTER }
  98. if (nodetype=subn) and (nf_swapped in flags) then
  99. begin
  100. if extra_not then
  101. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  102. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  103. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  104. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  105. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  106. end
  107. else
  108. begin
  109. { Optimizations when right.location is a constant value }
  110. if (op=A_CMP) and
  111. (nodetype in [equaln,unequaln]) and
  112. (right.location.loc=LOC_CONSTANT) and
  113. (right.location.value=0) then
  114. begin
  115. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  116. end
  117. else
  118. if (op=A_ADD) and
  119. (right.location.loc=LOC_CONSTANT) and
  120. (right.location.value=1) and
  121. not(cs_check_overflow in current_settings.localswitches) then
  122. begin
  123. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  124. end
  125. else
  126. if (op=A_SUB) and
  127. (right.location.loc=LOC_CONSTANT) and
  128. (right.location.value=1) and
  129. not(cs_check_overflow in current_settings.localswitches) then
  130. begin
  131. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  132. end
  133. else
  134. if (op=A_IMUL) and
  135. (right.location.loc=LOC_CONSTANT) and
  136. (ispowerof2(int64(right.location.value),power)) and
  137. not(cs_check_overflow in current_settings.localswitches) then
  138. begin
  139. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  140. end
  141. else
  142. begin
  143. if extra_not then
  144. begin
  145. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  146. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  147. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  148. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  149. end
  150. else
  151. begin
  152. emit_op_right_left(op,opsize);
  153. end;
  154. end;
  155. end;
  156. end;
  157. { only in case of overflow operations }
  158. { produce overflow code }
  159. { we must put it here directly, because sign of operation }
  160. { is in unsigned VAR!! }
  161. if mboverflow then
  162. begin
  163. if cs_check_overflow in current_settings.localswitches then
  164. begin
  165. current_asmdata.getjumplabel(hl4);
  166. if unsigned then
  167. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  168. else
  169. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  170. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  171. cg.a_label(current_asmdata.CurrAsmList,hl4);
  172. end;
  173. end;
  174. end;
  175. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  176. begin
  177. { left location is not a register? }
  178. if (left.location.loc<>LOC_REGISTER) then
  179. begin
  180. { if right is register then we can swap the locations }
  181. if (not noswap) and
  182. (right.location.loc=LOC_REGISTER) then
  183. begin
  184. location_swap(left.location,right.location);
  185. toggleflag(nf_swapped);
  186. end
  187. else
  188. begin
  189. { maybe we can reuse a constant register when the
  190. operation is a comparison that doesn't change the
  191. value of the register }
  192. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  193. end;
  194. end;
  195. if (right.location.loc<>LOC_CONSTANT) and
  196. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  197. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  198. if (left.location.loc<>LOC_CONSTANT) and
  199. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  200. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  201. end;
  202. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  203. begin
  204. if (right.location.loc<>LOC_FPUREGISTER) then
  205. begin
  206. if (force_fpureg) then
  207. begin
  208. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  209. if (left.location.loc<>LOC_FPUREGISTER) then
  210. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  211. else
  212. { left was on the stack => swap }
  213. toggleflag(nf_swapped);
  214. end
  215. end
  216. { the nominator in st0 }
  217. else if (left.location.loc<>LOC_FPUREGISTER) then
  218. begin
  219. if (force_fpureg) then
  220. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  221. end
  222. else
  223. begin
  224. { fpu operands are always in the wrong order on the stack }
  225. toggleflag(nf_swapped);
  226. end;
  227. end;
  228. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  229. {$ifdef x86_64}
  230. var
  231. tmpreg : tregister;
  232. {$endif x86_64}
  233. begin
  234. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  235. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  236. { left must be a register }
  237. case right.location.loc of
  238. LOC_REGISTER,
  239. LOC_CREGISTER :
  240. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  241. LOC_REFERENCE,
  242. LOC_CREFERENCE :
  243. begin
  244. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  245. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  246. end;
  247. LOC_CONSTANT :
  248. begin
  249. {$ifdef x86_64}
  250. { x86_64 only supports signed 32 bits constants directly }
  251. if (opsize in [OS_S64,OS_64]) and
  252. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  253. begin
  254. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  255. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  256. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  257. end
  258. else
  259. {$endif x86_64}
  260. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  261. end;
  262. else
  263. internalerror(200203232);
  264. end;
  265. end;
  266. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  267. begin
  268. case nodetype of
  269. equaln : getresflags:=F_E;
  270. unequaln : getresflags:=F_NE;
  271. else
  272. if not(unsigned) then
  273. begin
  274. if nf_swapped in flags then
  275. case nodetype of
  276. ltn : getresflags:=F_G;
  277. lten : getresflags:=F_GE;
  278. gtn : getresflags:=F_L;
  279. gten : getresflags:=F_LE;
  280. end
  281. else
  282. case nodetype of
  283. ltn : getresflags:=F_L;
  284. lten : getresflags:=F_LE;
  285. gtn : getresflags:=F_G;
  286. gten : getresflags:=F_GE;
  287. end;
  288. end
  289. else
  290. begin
  291. if nf_swapped in flags then
  292. case nodetype of
  293. ltn : getresflags:=F_A;
  294. lten : getresflags:=F_AE;
  295. gtn : getresflags:=F_B;
  296. gten : getresflags:=F_BE;
  297. end
  298. else
  299. case nodetype of
  300. ltn : getresflags:=F_B;
  301. lten : getresflags:=F_BE;
  302. gtn : getresflags:=F_A;
  303. gten : getresflags:=F_AE;
  304. end;
  305. end;
  306. end;
  307. end;
  308. {*****************************************************************************
  309. AddSmallSet
  310. *****************************************************************************}
  311. {$ifndef i8086}
  312. procedure tx86addnode.second_addsmallset;
  313. var
  314. setbase : aint;
  315. opdef : tdef;
  316. opsize : TCGSize;
  317. op : TAsmOp;
  318. extra_not,
  319. noswap : boolean;
  320. all_member_optimization:boolean;
  321. begin
  322. pass_left_right;
  323. noswap:=false;
  324. extra_not:=false;
  325. all_member_optimization:=false;
  326. opdef:=resultdef;
  327. opsize:=int_cgsize(opdef.size);
  328. if (left.resultdef.typ=setdef) then
  329. setbase:=tsetdef(left.resultdef).setbase
  330. else
  331. setbase:=tsetdef(right.resultdef).setbase;
  332. case nodetype of
  333. addn :
  334. begin
  335. { adding elements is not commutative }
  336. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  337. swapleftright;
  338. { are we adding set elements ? }
  339. if right.nodetype=setelementn then
  340. begin
  341. { no range support for smallsets! }
  342. if assigned(tsetelementnode(right).right) then
  343. internalerror(43244);
  344. { btsb isn't supported }
  345. if opsize=OS_8 then
  346. begin
  347. opsize:=OS_32;
  348. opdef:=u32inttype;
  349. end;
  350. { bts requires both elements to be registers }
  351. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  352. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  353. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  354. op:=A_BTS;
  355. noswap:=true;
  356. end
  357. else
  358. op:=A_OR;
  359. end;
  360. symdifn :
  361. op:=A_XOR;
  362. muln :
  363. op:=A_AND;
  364. subn :
  365. begin
  366. op:=A_AND;
  367. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  368. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  369. all_member_optimization:=true;
  370. if (not(nf_swapped in flags)) and
  371. (right.location.loc=LOC_CONSTANT) then
  372. right.location.value := not(right.location.value)
  373. else if (nf_swapped in flags) and
  374. (left.location.loc=LOC_CONSTANT) then
  375. left.location.value := not(left.location.value)
  376. else
  377. extra_not:=true;
  378. end;
  379. xorn :
  380. op:=A_XOR;
  381. orn :
  382. op:=A_OR;
  383. andn :
  384. op:=A_AND;
  385. else
  386. internalerror(2003042215);
  387. end;
  388. if all_member_optimization then
  389. begin
  390. {A set expression [0..31]-x can be implemented with a simple NOT.}
  391. if nf_swapped in flags then
  392. begin
  393. { newly swapped also set swapped flag }
  394. location_swap(left.location,right.location);
  395. toggleflag(nf_swapped);
  396. end;
  397. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  398. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  399. location:=right.location;
  400. end
  401. else
  402. begin
  403. { left must be a register }
  404. left_must_be_reg(opdef,opsize,noswap);
  405. emit_generic_code(op,opsize,true,extra_not,false);
  406. location_freetemp(current_asmdata.CurrAsmList,right.location);
  407. { left is always a register and contains the result }
  408. location:=left.location;
  409. end;
  410. { fix the changed opsize we did above because of the missing btsb }
  411. if opsize<>int_cgsize(resultdef.size) then
  412. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  413. end;
  414. {$endif not i8086}
  415. procedure tx86addnode.second_cmpsmallset;
  416. var
  417. opdef : tdef;
  418. opsize : TCGSize;
  419. op : TAsmOp;
  420. begin
  421. pass_left_right;
  422. opdef:=left.resultdef;
  423. opsize:=int_cgsize(opdef.size);
  424. case nodetype of
  425. equaln,
  426. unequaln :
  427. op:=A_CMP;
  428. lten,gten:
  429. begin
  430. if (not(nf_swapped in flags) and (nodetype = lten)) or
  431. ((nf_swapped in flags) and (nodetype = gten)) then
  432. swapleftright;
  433. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  434. emit_op_right_left(A_AND,opsize);
  435. op:=A_CMP;
  436. { warning: ugly hack, we need a JE so change the node to equaln }
  437. nodetype:=equaln;
  438. end;
  439. else
  440. internalerror(2003042215);
  441. end;
  442. { left must be a register }
  443. left_must_be_reg(opdef,opsize,false);
  444. emit_generic_code(op,opsize,true,false,false);
  445. location_freetemp(current_asmdata.CurrAsmList,right.location);
  446. location_freetemp(current_asmdata.CurrAsmList,left.location);
  447. location_reset(location,LOC_FLAGS,OS_NO);
  448. location.resflags:=getresflags(true);
  449. end;
  450. {*****************************************************************************
  451. AddMMX
  452. *****************************************************************************}
  453. {$ifdef SUPPORT_MMX}
  454. procedure tx86addnode.second_opmmx;
  455. var
  456. op : TAsmOp;
  457. cmpop : boolean;
  458. mmxbase : tmmxtype;
  459. hreg,
  460. hregister : tregister;
  461. begin
  462. pass_left_right;
  463. cmpop:=false;
  464. mmxbase:=mmx_type(left.resultdef);
  465. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  466. case nodetype of
  467. addn :
  468. begin
  469. if (cs_mmx_saturation in current_settings.localswitches) then
  470. begin
  471. case mmxbase of
  472. mmxs8bit:
  473. op:=A_PADDSB;
  474. mmxu8bit:
  475. op:=A_PADDUSB;
  476. mmxs16bit,mmxfixed16:
  477. op:=A_PADDSW;
  478. mmxu16bit:
  479. op:=A_PADDUSW;
  480. end;
  481. end
  482. else
  483. begin
  484. case mmxbase of
  485. mmxs8bit,mmxu8bit:
  486. op:=A_PADDB;
  487. mmxs16bit,mmxu16bit,mmxfixed16:
  488. op:=A_PADDW;
  489. mmxs32bit,mmxu32bit:
  490. op:=A_PADDD;
  491. end;
  492. end;
  493. end;
  494. muln :
  495. begin
  496. case mmxbase of
  497. mmxs16bit,mmxu16bit:
  498. op:=A_PMULLW;
  499. mmxfixed16:
  500. op:=A_PMULHW;
  501. end;
  502. end;
  503. subn :
  504. begin
  505. if (cs_mmx_saturation in current_settings.localswitches) then
  506. begin
  507. case mmxbase of
  508. mmxs8bit:
  509. op:=A_PSUBSB;
  510. mmxu8bit:
  511. op:=A_PSUBUSB;
  512. mmxs16bit,mmxfixed16:
  513. op:=A_PSUBSB;
  514. mmxu16bit:
  515. op:=A_PSUBUSW;
  516. end;
  517. end
  518. else
  519. begin
  520. case mmxbase of
  521. mmxs8bit,mmxu8bit:
  522. op:=A_PSUBB;
  523. mmxs16bit,mmxu16bit,mmxfixed16:
  524. op:=A_PSUBW;
  525. mmxs32bit,mmxu32bit:
  526. op:=A_PSUBD;
  527. end;
  528. end;
  529. end;
  530. xorn:
  531. op:=A_PXOR;
  532. orn:
  533. op:=A_POR;
  534. andn:
  535. op:=A_PAND;
  536. else
  537. internalerror(2003042214);
  538. end;
  539. { left and right no register? }
  540. { then one must be demanded }
  541. if (left.location.loc<>LOC_MMXREGISTER) then
  542. begin
  543. if (right.location.loc=LOC_MMXREGISTER) then
  544. begin
  545. location_swap(left.location,right.location);
  546. toggleflag(nf_swapped);
  547. end
  548. else
  549. begin
  550. { register variable ? }
  551. if (left.location.loc=LOC_CMMXREGISTER) then
  552. begin
  553. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  554. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  555. end
  556. else
  557. begin
  558. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  559. internalerror(200203245);
  560. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  561. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  562. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  563. end;
  564. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  565. left.location.register:=hregister;
  566. end;
  567. end;
  568. { at this point, left.location.loc should be LOC_MMXREGISTER }
  569. if right.location.loc<>LOC_MMXREGISTER then
  570. begin
  571. if (nodetype=subn) and (nf_swapped in flags) then
  572. begin
  573. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  574. if right.location.loc=LOC_CMMXREGISTER then
  575. begin
  576. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  577. emit_reg_reg(op,S_NO,left.location.register,hreg);
  578. end
  579. else
  580. begin
  581. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  582. internalerror(200203247);
  583. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  584. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  585. emit_reg_reg(op,S_NO,left.location.register,hreg);
  586. end;
  587. location.register:=hreg;
  588. end
  589. else
  590. begin
  591. if (right.location.loc=LOC_CMMXREGISTER) then
  592. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  593. else
  594. begin
  595. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  596. internalerror(200203246);
  597. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  598. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  599. end;
  600. location.register:=left.location.register;
  601. end;
  602. end
  603. else
  604. begin
  605. { right.location=LOC_MMXREGISTER }
  606. if (nodetype=subn) and (nf_swapped in flags) then
  607. begin
  608. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  609. location_swap(left.location,right.location);
  610. toggleflag(nf_swapped);
  611. end
  612. else
  613. begin
  614. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  615. end;
  616. location.register:=left.location.register;
  617. end;
  618. location_freetemp(current_asmdata.CurrAsmList,right.location);
  619. if cmpop then
  620. location_freetemp(current_asmdata.CurrAsmList,left.location);
  621. end;
  622. {$endif SUPPORT_MMX}
  623. {*****************************************************************************
  624. AddFloat
  625. *****************************************************************************}
  626. procedure tx86addnode.second_addfloatsse;
  627. var
  628. op : topcg;
  629. sqr_sum : boolean;
  630. tmp : tnode;
  631. begin
  632. sqr_sum:=false;
  633. if (current_settings.fputype>=fpu_sse3) and
  634. use_vectorfpu(resultdef) and
  635. (nodetype in [addn,subn]) and
  636. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  637. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  638. begin
  639. sqr_sum:=true;
  640. tmp:=tinlinenode(left).left;
  641. tinlinenode(left).left:=nil;
  642. left.free;
  643. left:=tmp;
  644. tmp:=tinlinenode(right).left;
  645. tinlinenode(right).left:=nil;
  646. right.free;
  647. right:=tmp;
  648. end;
  649. pass_left_right;
  650. check_left_and_right_fpureg(false);
  651. if (nf_swapped in flags) then
  652. { can't use swapleftright if both are on the fpu stack, since then }
  653. { both are "R_ST" -> nothing would change -> manually switch }
  654. if (left.location.loc = LOC_FPUREGISTER) and
  655. (right.location.loc = LOC_FPUREGISTER) then
  656. emit_none(A_FXCH,S_NO)
  657. else
  658. swapleftright;
  659. case nodetype of
  660. addn :
  661. op:=OP_ADD;
  662. muln :
  663. op:=OP_MUL;
  664. subn :
  665. op:=OP_SUB;
  666. slashn :
  667. op:=OP_DIV;
  668. else
  669. internalerror(200312231);
  670. end;
  671. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  672. if sqr_sum then
  673. begin
  674. if nf_swapped in flags then
  675. swapleftright;
  676. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  677. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  678. location:=left.location;
  679. if is_double(resultdef) then
  680. begin
  681. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  682. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  683. case nodetype of
  684. addn:
  685. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  686. subn:
  687. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  688. else
  689. internalerror(201108162);
  690. end;
  691. end
  692. else
  693. begin
  694. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  695. { ensure that bits 64..127 contain valid values }
  696. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  697. { the data is now in bits 0..32 and 64..95 }
  698. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  699. case nodetype of
  700. addn:
  701. begin
  702. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  703. end;
  704. subn:
  705. begin
  706. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  707. end;
  708. else
  709. internalerror(201108163);
  710. end;
  711. end
  712. end
  713. { we can use only right as left operand if the operation is commutative }
  714. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  715. begin
  716. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  717. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  718. { force floating point reg. location to be written to memory,
  719. we don't force it to mm register because writing to memory
  720. allows probably shorter code because there is no direct fpu->mm register
  721. copy instruction
  722. }
  723. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  724. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  725. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  726. end
  727. else
  728. begin
  729. if nf_swapped in flags then
  730. swapleftright;
  731. { force floating point reg. location to be written to memory,
  732. we don't force it to mm register because writing to memory
  733. allows probably shorter code because there is no direct fpu->mm register
  734. copy instruction
  735. }
  736. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  737. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  738. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  739. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  740. { force floating point reg. location to be written to memory,
  741. we don't force it to mm register because writing to memory
  742. allows probably shorter code because there is no direct fpu->mm register
  743. copy instruction
  744. }
  745. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  746. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  747. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  748. end;
  749. end;
  750. procedure tx86addnode.second_addfloatavx;
  751. var
  752. op : topcg;
  753. sqr_sum : boolean;
  754. tmp : tnode;
  755. begin
  756. sqr_sum:=false;
  757. {$ifdef dummy}
  758. if (current_settings.fputype>=fpu_sse3) and
  759. use_vectorfpu(resultdef) and
  760. (nodetype in [addn,subn]) and
  761. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  762. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  763. begin
  764. sqr_sum:=true;
  765. tmp:=tinlinenode(left).left;
  766. tinlinenode(left).left:=nil;
  767. left.free;
  768. left:=tmp;
  769. tmp:=tinlinenode(right).left;
  770. tinlinenode(right).left:=nil;
  771. right.free;
  772. right:=tmp;
  773. end;
  774. {$endif dummy}
  775. pass_left_right;
  776. check_left_and_right_fpureg(false);
  777. if (nf_swapped in flags) then
  778. { can't use swapleftright if both are on the fpu stack, since then }
  779. { both are "R_ST" -> nothing would change -> manually switch }
  780. if (left.location.loc = LOC_FPUREGISTER) and
  781. (right.location.loc = LOC_FPUREGISTER) then
  782. emit_none(A_FXCH,S_NO)
  783. else
  784. swapleftright;
  785. case nodetype of
  786. addn :
  787. op:=OP_ADD;
  788. muln :
  789. op:=OP_MUL;
  790. subn :
  791. op:=OP_SUB;
  792. slashn :
  793. op:=OP_DIV;
  794. else
  795. internalerror(200312231);
  796. end;
  797. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  798. if sqr_sum then
  799. begin
  800. if nf_swapped in flags then
  801. swapleftright;
  802. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  803. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  804. location:=left.location;
  805. if is_double(resultdef) then
  806. begin
  807. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  808. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  809. case nodetype of
  810. addn:
  811. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  812. subn:
  813. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  814. else
  815. internalerror(201108162);
  816. end;
  817. end
  818. else
  819. begin
  820. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  821. { ensure that bits 64..127 contain valid values }
  822. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  823. { the data is now in bits 0..32 and 64..95 }
  824. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  825. case nodetype of
  826. addn:
  827. begin
  828. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  829. end;
  830. subn:
  831. begin
  832. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  833. end;
  834. else
  835. internalerror(201108163);
  836. end;
  837. end
  838. end
  839. { left*2 ? }
  840. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  841. begin
  842. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  843. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  844. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  845. left.location.register,
  846. left.location.register,
  847. location.register,
  848. mms_movescalar);
  849. end
  850. { right*2 ? }
  851. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  852. begin
  853. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  854. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  855. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  856. right.location.register,
  857. right.location.register,
  858. location.register,
  859. mms_movescalar);
  860. end
  861. { we can use only right as left operand if the operation is commutative }
  862. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  863. begin
  864. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  865. { force floating point reg. location to be written to memory,
  866. we don't force it to mm register because writing to memory
  867. allows probably shorter code because there is no direct fpu->mm register
  868. copy instruction
  869. }
  870. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  871. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  872. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  873. left.location,
  874. right.location.register,
  875. location.register,
  876. mms_movescalar);
  877. end
  878. else
  879. begin
  880. if (nf_swapped in flags) then
  881. swapleftright;
  882. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  883. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  884. { force floating point reg. location to be written to memory,
  885. we don't force it to mm register because writing to memory
  886. allows probably shorter code because there is no direct fpu->mm register
  887. copy instruction
  888. }
  889. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  890. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  891. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  892. right.location,
  893. left.location.register,
  894. location.register,
  895. mms_movescalar);
  896. end;
  897. end;
  898. procedure tx86addnode.second_cmpfloatsse;
  899. var
  900. op : tasmop;
  901. begin
  902. if is_single(left.resultdef) then
  903. op:=A_COMISS
  904. else if is_double(left.resultdef) then
  905. op:=A_COMISD
  906. else
  907. internalerror(200402222);
  908. pass_left_right;
  909. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  910. { we can use only right as left operand if the operation is commutative }
  911. if (right.location.loc=LOC_MMREGISTER) then
  912. begin
  913. { force floating point reg. location to be written to memory,
  914. we don't force it to mm register because writing to memory
  915. allows probably shorter code because there is no direct fpu->mm register
  916. copy instruction
  917. }
  918. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  919. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  920. case left.location.loc of
  921. LOC_REFERENCE,LOC_CREFERENCE:
  922. begin
  923. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  924. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  925. end;
  926. LOC_MMREGISTER,LOC_CMMREGISTER:
  927. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  928. else
  929. internalerror(200402221);
  930. end;
  931. if nf_swapped in flags then
  932. exclude(flags,nf_swapped)
  933. else
  934. include(flags,nf_swapped)
  935. end
  936. else
  937. begin
  938. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  939. { force floating point reg. location to be written to memory,
  940. we don't force it to mm register because writing to memory
  941. allows probably shorter code because there is no direct fpu->mm register
  942. copy instruction
  943. }
  944. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  945. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  946. case right.location.loc of
  947. LOC_REFERENCE,LOC_CREFERENCE:
  948. begin
  949. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  950. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  951. end;
  952. LOC_MMREGISTER,LOC_CMMREGISTER:
  953. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  954. else
  955. internalerror(200402223);
  956. end;
  957. end;
  958. location.resflags:=getresflags(true);
  959. end;
  960. procedure tx86addnode.second_cmpfloatavx;
  961. var
  962. op : tasmop;
  963. begin
  964. if is_single(left.resultdef) then
  965. op:=A_VCOMISS
  966. else if is_double(left.resultdef) then
  967. op:=A_VCOMISD
  968. else
  969. internalerror(200402222);
  970. pass_left_right;
  971. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  972. { we can use only right as left operand if the operation is commutative }
  973. if (right.location.loc=LOC_MMREGISTER) then
  974. begin
  975. { force floating point reg. location to be written to memory,
  976. we don't force it to mm register because writing to memory
  977. allows probably shorter code because there is no direct fpu->mm register
  978. copy instruction
  979. }
  980. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  981. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  982. case left.location.loc of
  983. LOC_REFERENCE,LOC_CREFERENCE:
  984. begin
  985. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  986. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  987. end;
  988. LOC_MMREGISTER,LOC_CMMREGISTER:
  989. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  990. else
  991. internalerror(200402221);
  992. end;
  993. if nf_swapped in flags then
  994. exclude(flags,nf_swapped)
  995. else
  996. include(flags,nf_swapped)
  997. end
  998. else
  999. begin
  1000. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1001. { force floating point reg. location to be written to memory,
  1002. we don't force it to mm register because writing to memory
  1003. allows probably shorter code because there is no direct fpu->mm register
  1004. copy instruction
  1005. }
  1006. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1007. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1008. case right.location.loc of
  1009. LOC_REFERENCE,LOC_CREFERENCE:
  1010. begin
  1011. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1012. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1013. end;
  1014. LOC_MMREGISTER,LOC_CMMREGISTER:
  1015. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1016. else
  1017. internalerror(200402223);
  1018. end;
  1019. end;
  1020. location.resflags:=getresflags(true);
  1021. end;
  1022. procedure tx86addnode.second_opvector;
  1023. var
  1024. op : topcg;
  1025. begin
  1026. pass_left_right;
  1027. if (nf_swapped in flags) then
  1028. swapleftright;
  1029. case nodetype of
  1030. addn :
  1031. op:=OP_ADD;
  1032. muln :
  1033. op:=OP_MUL;
  1034. subn :
  1035. op:=OP_SUB;
  1036. slashn :
  1037. op:=OP_DIV;
  1038. else
  1039. internalerror(200610071);
  1040. end;
  1041. if fits_in_mm_register(left.resultdef) then
  1042. begin
  1043. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1044. { we can use only right as left operand if the operation is commutative }
  1045. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1046. begin
  1047. location.register:=right.location.register;
  1048. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1049. end
  1050. else
  1051. begin
  1052. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1053. location.register:=left.location.register;
  1054. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1055. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1056. end;
  1057. end
  1058. else
  1059. begin
  1060. { not yet supported }
  1061. internalerror(200610072);
  1062. end
  1063. end;
  1064. procedure tx86addnode.second_addfloat;
  1065. var
  1066. op : TAsmOp;
  1067. begin
  1068. if use_vectorfpu(resultdef) then
  1069. begin
  1070. if UseAVX then
  1071. second_addfloatavx
  1072. else
  1073. second_addfloatsse;
  1074. exit;
  1075. end;
  1076. pass_left_right;
  1077. case nodetype of
  1078. addn :
  1079. op:=A_FADDP;
  1080. muln :
  1081. op:=A_FMULP;
  1082. subn :
  1083. op:=A_FSUBP;
  1084. slashn :
  1085. op:=A_FDIVP;
  1086. else
  1087. internalerror(2003042214);
  1088. end;
  1089. check_left_and_right_fpureg(true);
  1090. { if we swaped the tree nodes, then use the reverse operator }
  1091. if nf_swapped in flags then
  1092. begin
  1093. if (nodetype=slashn) then
  1094. op:=A_FDIVRP
  1095. else if (nodetype=subn) then
  1096. op:=A_FSUBRP;
  1097. end;
  1098. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1099. tcgx86(cg).dec_fpu_stack;
  1100. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1101. location.register:=NR_ST;
  1102. end;
  1103. procedure tx86addnode.second_cmpfloat;
  1104. var
  1105. resflags : tresflags;
  1106. {$ifdef i8086}
  1107. tmpref: treference;
  1108. {$endif i8086}
  1109. begin
  1110. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1111. begin
  1112. if UseAVX then
  1113. second_cmpfloatavx
  1114. else
  1115. second_cmpfloatsse;
  1116. exit;
  1117. end;
  1118. pass_left_right;
  1119. check_left_and_right_fpureg(true);
  1120. {$ifndef x86_64}
  1121. if current_settings.cputype<cpu_Pentium2 then
  1122. begin
  1123. emit_none(A_FCOMPP,S_NO);
  1124. tcgx86(cg).dec_fpu_stack;
  1125. tcgx86(cg).dec_fpu_stack;
  1126. { load fpu flags }
  1127. {$ifdef i8086}
  1128. if current_settings.cputype < cpu_286 then
  1129. begin
  1130. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1131. emit_ref(A_FNSTSW,S_NO,tmpref);
  1132. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1133. emit_ref_reg(A_MOV,S_W,tmpref,NR_AX);
  1134. emit_none(A_SAHF,S_NO);
  1135. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1136. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1137. end
  1138. else
  1139. {$endif i8086}
  1140. begin
  1141. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1142. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1143. emit_none(A_SAHF,S_NO);
  1144. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1145. end;
  1146. if nf_swapped in flags then
  1147. begin
  1148. case nodetype of
  1149. equaln : resflags:=F_E;
  1150. unequaln : resflags:=F_NE;
  1151. ltn : resflags:=F_A;
  1152. lten : resflags:=F_AE;
  1153. gtn : resflags:=F_B;
  1154. gten : resflags:=F_BE;
  1155. end;
  1156. end
  1157. else
  1158. begin
  1159. case nodetype of
  1160. equaln : resflags:=F_E;
  1161. unequaln : resflags:=F_NE;
  1162. ltn : resflags:=F_B;
  1163. lten : resflags:=F_BE;
  1164. gtn : resflags:=F_A;
  1165. gten : resflags:=F_AE;
  1166. end;
  1167. end;
  1168. end
  1169. else
  1170. {$endif x86_64}
  1171. begin
  1172. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1173. { fcomip pops only one fpu register }
  1174. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1175. tcgx86(cg).dec_fpu_stack;
  1176. tcgx86(cg).dec_fpu_stack;
  1177. { load fpu flags }
  1178. if nf_swapped in flags then
  1179. begin
  1180. case nodetype of
  1181. equaln : resflags:=F_E;
  1182. unequaln : resflags:=F_NE;
  1183. ltn : resflags:=F_A;
  1184. lten : resflags:=F_AE;
  1185. gtn : resflags:=F_B;
  1186. gten : resflags:=F_BE;
  1187. end;
  1188. end
  1189. else
  1190. begin
  1191. case nodetype of
  1192. equaln : resflags:=F_E;
  1193. unequaln : resflags:=F_NE;
  1194. ltn : resflags:=F_B;
  1195. lten : resflags:=F_BE;
  1196. gtn : resflags:=F_A;
  1197. gten : resflags:=F_AE;
  1198. end;
  1199. end;
  1200. end;
  1201. location_reset(location,LOC_FLAGS,OS_NO);
  1202. location.resflags:=resflags;
  1203. end;
  1204. {*****************************************************************************
  1205. Add64bit
  1206. *****************************************************************************}
  1207. procedure tx86addnode.second_add64bit;
  1208. begin
  1209. {$ifdef cpu64bitalu}
  1210. second_addordinal;
  1211. {$else cpu64bitalu}
  1212. { must be implemented separate }
  1213. internalerror(200402042);
  1214. {$endif cpu64bitalu}
  1215. end;
  1216. procedure tx86addnode.second_cmp64bit;
  1217. begin
  1218. {$ifdef cpu64bitalu}
  1219. second_cmpordinal;
  1220. {$else cpu64bitalu}
  1221. { must be implemented separate }
  1222. internalerror(200402043);
  1223. {$endif cpu64bitalu}
  1224. end;
  1225. {*****************************************************************************
  1226. AddOrdinal
  1227. *****************************************************************************}
  1228. procedure tx86addnode.second_cmpordinal;
  1229. var
  1230. opdef : tdef;
  1231. opsize : tcgsize;
  1232. unsigned : boolean;
  1233. begin
  1234. unsigned:=not(is_signed(left.resultdef)) or
  1235. not(is_signed(right.resultdef));
  1236. opdef:=left.resultdef;
  1237. opsize:=def_cgsize(opdef);
  1238. pass_left_right;
  1239. left_must_be_reg(opdef,opsize,false);
  1240. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1241. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1242. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1243. location_reset(location,LOC_FLAGS,OS_NO);
  1244. location.resflags:=getresflags(unsigned);
  1245. end;
  1246. begin
  1247. caddnode:=tx86addnode;
  1248. end.