nx86add.pas 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure force_left_and_right_fpureg;
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatsse;
  34. procedure second_cmpfloatavx;
  35. procedure second_addfloatsse;
  36. procedure second_addfloatavx;
  37. public
  38. procedure second_addfloat;override;
  39. {$ifndef i8086}
  40. procedure second_addsmallset;override;
  41. {$endif not i8086}
  42. procedure second_add64bit;override;
  43. procedure second_cmpfloat;override;
  44. procedure second_cmpsmallset;override;
  45. procedure second_cmp64bit;override;
  46. procedure second_cmpordinal;override;
  47. {$ifdef SUPPORT_MMX}
  48. procedure second_opmmx;override;
  49. {$endif SUPPORT_MMX}
  50. procedure second_opvector;override;
  51. end;
  52. implementation
  53. uses
  54. globtype,globals,
  55. verbose,cutils,
  56. cpuinfo,
  57. aasmbase,aasmtai,aasmdata,aasmcpu,
  58. symconst,symdef,
  59. cgobj,hlcgobj,cgx86,cga,cgutils,
  60. paramgr,tgobj,ncgutil,
  61. ncon,nset,ninl,
  62. defutil;
  63. {*****************************************************************************
  64. Helpers
  65. *****************************************************************************}
  66. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  67. var
  68. power : longint;
  69. hl4 : tasmlabel;
  70. r : Tregister;
  71. href : treference;
  72. begin
  73. { at this point, left.location.loc should be LOC_REGISTER }
  74. if right.location.loc=LOC_REGISTER then
  75. begin
  76. { right.location is a LOC_REGISTER }
  77. { when swapped another result register }
  78. if (nodetype=subn) and (nf_swapped in flags) then
  79. begin
  80. if extra_not then
  81. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  82. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  83. { newly swapped also set swapped flag }
  84. location_swap(left.location,right.location);
  85. toggleflag(nf_swapped);
  86. end
  87. else
  88. begin
  89. if extra_not then
  90. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  91. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  92. location_swap(left.location,right.location);
  93. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  94. end;
  95. end
  96. else
  97. begin
  98. { right.location is not a LOC_REGISTER }
  99. if (nodetype=subn) and (nf_swapped in flags) then
  100. begin
  101. if extra_not then
  102. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  103. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  104. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  105. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  106. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  107. end
  108. else
  109. begin
  110. { Optimizations when right.location is a constant value }
  111. if (op=A_CMP) and
  112. (nodetype in [equaln,unequaln]) and
  113. (right.location.loc=LOC_CONSTANT) and
  114. (right.location.value=0) then
  115. begin
  116. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  117. end
  118. else
  119. if (op=A_ADD) and
  120. (right.location.loc=LOC_CONSTANT) and
  121. (right.location.value=1) and
  122. not(cs_check_overflow in current_settings.localswitches) then
  123. begin
  124. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  125. end
  126. else
  127. if (op=A_SUB) and
  128. (right.location.loc=LOC_CONSTANT) and
  129. (right.location.value=1) and
  130. not(cs_check_overflow in current_settings.localswitches) and
  131. UseIncDec then
  132. begin
  133. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  134. end
  135. else
  136. if (op=A_IMUL) and
  137. (right.location.loc=LOC_CONSTANT) and
  138. (ispowerof2(int64(right.location.value),power)) and
  139. not(cs_check_overflow in current_settings.localswitches) then
  140. begin
  141. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  142. end
  143. else if (op=A_IMUL) and
  144. (right.location.loc=LOC_CONSTANT) and
  145. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  146. (power in [1..3]) and
  147. not(cs_check_overflow in current_settings.localswitches) then
  148. begin
  149. reference_reset_base(href,left.location.register,0,0);
  150. href.index:=left.location.register;
  151. href.scalefactor:=int64(right.location.value)-1;
  152. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  153. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  154. end
  155. else
  156. begin
  157. if extra_not then
  158. begin
  159. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  160. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  161. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  162. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  163. end
  164. else
  165. begin
  166. emit_op_right_left(op,opsize);
  167. end;
  168. end;
  169. end;
  170. end;
  171. { only in case of overflow operations }
  172. { produce overflow code }
  173. { we must put it here directly, because sign of operation }
  174. { is in unsigned VAR!! }
  175. if mboverflow then
  176. begin
  177. if cs_check_overflow in current_settings.localswitches then
  178. begin
  179. current_asmdata.getjumplabel(hl4);
  180. if unsigned then
  181. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  182. else
  183. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  184. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  185. cg.a_label(current_asmdata.CurrAsmList,hl4);
  186. end;
  187. end;
  188. end;
  189. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  190. begin
  191. { left location is not a register? }
  192. if (left.location.loc<>LOC_REGISTER) then
  193. begin
  194. { if right is register then we can swap the locations }
  195. if (not noswap) and
  196. (right.location.loc=LOC_REGISTER) then
  197. begin
  198. location_swap(left.location,right.location);
  199. toggleflag(nf_swapped);
  200. end
  201. else
  202. begin
  203. { maybe we can reuse a constant register when the
  204. operation is a comparison that doesn't change the
  205. value of the register }
  206. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  207. end;
  208. end;
  209. if (right.location.loc<>LOC_CONSTANT) and
  210. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  211. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  212. if (left.location.loc<>LOC_CONSTANT) and
  213. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  214. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  215. end;
  216. procedure tx86addnode.force_left_and_right_fpureg;
  217. begin
  218. if (right.location.loc<>LOC_FPUREGISTER) then
  219. begin
  220. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  221. if (left.location.loc<>LOC_FPUREGISTER) then
  222. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  223. else
  224. { left was on the stack => swap }
  225. toggleflag(nf_swapped);
  226. end
  227. { the nominator in st0 }
  228. else if (left.location.loc<>LOC_FPUREGISTER) then
  229. begin
  230. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  231. end
  232. else
  233. begin
  234. { fpu operands are always in the wrong order on the stack }
  235. toggleflag(nf_swapped);
  236. end;
  237. end;
  238. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  239. {$ifdef x86_64}
  240. var
  241. tmpreg : tregister;
  242. {$endif x86_64}
  243. begin
  244. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  245. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  246. { left must be a register }
  247. case right.location.loc of
  248. LOC_REGISTER,
  249. LOC_CREGISTER :
  250. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  251. LOC_REFERENCE,
  252. LOC_CREFERENCE :
  253. begin
  254. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  255. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  256. end;
  257. LOC_CONSTANT :
  258. begin
  259. {$ifdef x86_64}
  260. { x86_64 only supports signed 32 bits constants directly }
  261. if (opsize in [OS_S64,OS_64]) and
  262. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  263. begin
  264. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  265. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  266. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  267. end
  268. else
  269. {$endif x86_64}
  270. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  271. end;
  272. else
  273. internalerror(200203232);
  274. end;
  275. end;
  276. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  277. begin
  278. case nodetype of
  279. equaln : getresflags:=F_E;
  280. unequaln : getresflags:=F_NE;
  281. else
  282. if not(unsigned) then
  283. begin
  284. if nf_swapped in flags then
  285. case nodetype of
  286. ltn : getresflags:=F_G;
  287. lten : getresflags:=F_GE;
  288. gtn : getresflags:=F_L;
  289. gten : getresflags:=F_LE;
  290. else
  291. internalerror(2013120105);
  292. end
  293. else
  294. case nodetype of
  295. ltn : getresflags:=F_L;
  296. lten : getresflags:=F_LE;
  297. gtn : getresflags:=F_G;
  298. gten : getresflags:=F_GE;
  299. else
  300. internalerror(2013120106);
  301. end;
  302. end
  303. else
  304. begin
  305. if nf_swapped in flags then
  306. case nodetype of
  307. ltn : getresflags:=F_A;
  308. lten : getresflags:=F_AE;
  309. gtn : getresflags:=F_B;
  310. gten : getresflags:=F_BE;
  311. else
  312. internalerror(2013120107);
  313. end
  314. else
  315. case nodetype of
  316. ltn : getresflags:=F_B;
  317. lten : getresflags:=F_BE;
  318. gtn : getresflags:=F_A;
  319. gten : getresflags:=F_AE;
  320. else
  321. internalerror(2013120108);
  322. end;
  323. end;
  324. end;
  325. end;
  326. {*****************************************************************************
  327. AddSmallSet
  328. *****************************************************************************}
  329. {$ifndef i8086}
  330. procedure tx86addnode.second_addsmallset;
  331. var
  332. setbase : aint;
  333. opdef : tdef;
  334. opsize : TCGSize;
  335. op : TAsmOp;
  336. extra_not,
  337. noswap : boolean;
  338. all_member_optimization:boolean;
  339. begin
  340. pass_left_right;
  341. noswap:=false;
  342. extra_not:=false;
  343. all_member_optimization:=false;
  344. opdef:=resultdef;
  345. opsize:=int_cgsize(opdef.size);
  346. if (left.resultdef.typ=setdef) then
  347. setbase:=tsetdef(left.resultdef).setbase
  348. else
  349. setbase:=tsetdef(right.resultdef).setbase;
  350. case nodetype of
  351. addn :
  352. begin
  353. { adding elements is not commutative }
  354. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  355. swapleftright;
  356. { are we adding set elements ? }
  357. if right.nodetype=setelementn then
  358. begin
  359. { no range support for smallsets! }
  360. if assigned(tsetelementnode(right).right) then
  361. internalerror(43244);
  362. { btsb isn't supported }
  363. if opsize=OS_8 then
  364. begin
  365. opsize:=OS_32;
  366. opdef:=u32inttype;
  367. end;
  368. { bts requires both elements to be registers }
  369. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  370. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  371. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  372. op:=A_BTS;
  373. noswap:=true;
  374. end
  375. else
  376. op:=A_OR;
  377. end;
  378. symdifn :
  379. op:=A_XOR;
  380. muln :
  381. op:=A_AND;
  382. subn :
  383. begin
  384. op:=A_AND;
  385. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  386. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  387. all_member_optimization:=true;
  388. if (not(nf_swapped in flags)) and
  389. (right.location.loc=LOC_CONSTANT) then
  390. right.location.value := not(right.location.value)
  391. else if (nf_swapped in flags) and
  392. (left.location.loc=LOC_CONSTANT) then
  393. left.location.value := not(left.location.value)
  394. else
  395. extra_not:=true;
  396. end;
  397. xorn :
  398. op:=A_XOR;
  399. orn :
  400. op:=A_OR;
  401. andn :
  402. op:=A_AND;
  403. else
  404. internalerror(2003042215);
  405. end;
  406. if all_member_optimization then
  407. begin
  408. {A set expression [0..31]-x can be implemented with a simple NOT.}
  409. if nf_swapped in flags then
  410. begin
  411. { newly swapped also set swapped flag }
  412. location_swap(left.location,right.location);
  413. toggleflag(nf_swapped);
  414. end;
  415. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  416. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  417. location:=right.location;
  418. end
  419. else
  420. begin
  421. { left must be a register }
  422. left_must_be_reg(opdef,opsize,noswap);
  423. emit_generic_code(op,opsize,true,extra_not,false);
  424. location_freetemp(current_asmdata.CurrAsmList,right.location);
  425. { left is always a register and contains the result }
  426. location:=left.location;
  427. end;
  428. { fix the changed opsize we did above because of the missing btsb }
  429. if opsize<>int_cgsize(resultdef.size) then
  430. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  431. end;
  432. {$endif not i8086}
  433. procedure tx86addnode.second_cmpsmallset;
  434. var
  435. opdef : tdef;
  436. opsize : TCGSize;
  437. op : TAsmOp;
  438. begin
  439. pass_left_right;
  440. opdef:=left.resultdef;
  441. opsize:=int_cgsize(opdef.size);
  442. case nodetype of
  443. equaln,
  444. unequaln :
  445. op:=A_CMP;
  446. lten,gten:
  447. begin
  448. if (not(nf_swapped in flags) and (nodetype = lten)) or
  449. ((nf_swapped in flags) and (nodetype = gten)) then
  450. swapleftright;
  451. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  452. emit_op_right_left(A_AND,opsize);
  453. op:=A_CMP;
  454. { warning: ugly hack, we need a JE so change the node to equaln }
  455. nodetype:=equaln;
  456. end;
  457. else
  458. internalerror(2003042215);
  459. end;
  460. { left must be a register }
  461. left_must_be_reg(opdef,opsize,false);
  462. emit_generic_code(op,opsize,true,false,false);
  463. location_freetemp(current_asmdata.CurrAsmList,right.location);
  464. location_freetemp(current_asmdata.CurrAsmList,left.location);
  465. location_reset(location,LOC_FLAGS,OS_NO);
  466. location.resflags:=getresflags(true);
  467. end;
  468. {*****************************************************************************
  469. AddMMX
  470. *****************************************************************************}
  471. {$ifdef SUPPORT_MMX}
  472. procedure tx86addnode.second_opmmx;
  473. var
  474. op : TAsmOp;
  475. cmpop : boolean;
  476. mmxbase : tmmxtype;
  477. hreg,
  478. hregister : tregister;
  479. begin
  480. pass_left_right;
  481. cmpop:=false;
  482. mmxbase:=mmx_type(left.resultdef);
  483. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  484. case nodetype of
  485. addn :
  486. begin
  487. if (cs_mmx_saturation in current_settings.localswitches) then
  488. begin
  489. case mmxbase of
  490. mmxs8bit:
  491. op:=A_PADDSB;
  492. mmxu8bit:
  493. op:=A_PADDUSB;
  494. mmxs16bit,mmxfixed16:
  495. op:=A_PADDSW;
  496. mmxu16bit:
  497. op:=A_PADDUSW;
  498. end;
  499. end
  500. else
  501. begin
  502. case mmxbase of
  503. mmxs8bit,mmxu8bit:
  504. op:=A_PADDB;
  505. mmxs16bit,mmxu16bit,mmxfixed16:
  506. op:=A_PADDW;
  507. mmxs32bit,mmxu32bit:
  508. op:=A_PADDD;
  509. end;
  510. end;
  511. end;
  512. muln :
  513. begin
  514. case mmxbase of
  515. mmxs16bit,mmxu16bit:
  516. op:=A_PMULLW;
  517. mmxfixed16:
  518. op:=A_PMULHW;
  519. end;
  520. end;
  521. subn :
  522. begin
  523. if (cs_mmx_saturation in current_settings.localswitches) then
  524. begin
  525. case mmxbase of
  526. mmxs8bit:
  527. op:=A_PSUBSB;
  528. mmxu8bit:
  529. op:=A_PSUBUSB;
  530. mmxs16bit,mmxfixed16:
  531. op:=A_PSUBSB;
  532. mmxu16bit:
  533. op:=A_PSUBUSW;
  534. end;
  535. end
  536. else
  537. begin
  538. case mmxbase of
  539. mmxs8bit,mmxu8bit:
  540. op:=A_PSUBB;
  541. mmxs16bit,mmxu16bit,mmxfixed16:
  542. op:=A_PSUBW;
  543. mmxs32bit,mmxu32bit:
  544. op:=A_PSUBD;
  545. end;
  546. end;
  547. end;
  548. xorn:
  549. op:=A_PXOR;
  550. orn:
  551. op:=A_POR;
  552. andn:
  553. op:=A_PAND;
  554. else
  555. internalerror(2003042214);
  556. end;
  557. { left and right no register? }
  558. { then one must be demanded }
  559. if (left.location.loc<>LOC_MMXREGISTER) then
  560. begin
  561. if (right.location.loc=LOC_MMXREGISTER) then
  562. begin
  563. location_swap(left.location,right.location);
  564. toggleflag(nf_swapped);
  565. end
  566. else
  567. begin
  568. { register variable ? }
  569. if (left.location.loc=LOC_CMMXREGISTER) then
  570. begin
  571. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  572. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  573. end
  574. else
  575. begin
  576. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  577. internalerror(200203245);
  578. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  579. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  580. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  581. end;
  582. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  583. left.location.register:=hregister;
  584. end;
  585. end;
  586. { at this point, left.location.loc should be LOC_MMXREGISTER }
  587. if right.location.loc<>LOC_MMXREGISTER then
  588. begin
  589. if (nodetype=subn) and (nf_swapped in flags) then
  590. begin
  591. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  592. if right.location.loc=LOC_CMMXREGISTER then
  593. begin
  594. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  595. emit_reg_reg(op,S_NO,left.location.register,hreg);
  596. end
  597. else
  598. begin
  599. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  600. internalerror(200203247);
  601. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  602. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  603. emit_reg_reg(op,S_NO,left.location.register,hreg);
  604. end;
  605. location.register:=hreg;
  606. end
  607. else
  608. begin
  609. if (right.location.loc=LOC_CMMXREGISTER) then
  610. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  611. else
  612. begin
  613. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  614. internalerror(200203246);
  615. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  616. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  617. end;
  618. location.register:=left.location.register;
  619. end;
  620. end
  621. else
  622. begin
  623. { right.location=LOC_MMXREGISTER }
  624. if (nodetype=subn) and (nf_swapped in flags) then
  625. begin
  626. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  627. location_swap(left.location,right.location);
  628. toggleflag(nf_swapped);
  629. end
  630. else
  631. begin
  632. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  633. end;
  634. location.register:=left.location.register;
  635. end;
  636. location_freetemp(current_asmdata.CurrAsmList,right.location);
  637. if cmpop then
  638. location_freetemp(current_asmdata.CurrAsmList,left.location);
  639. end;
  640. {$endif SUPPORT_MMX}
  641. {*****************************************************************************
  642. AddFloat
  643. *****************************************************************************}
  644. procedure tx86addnode.second_addfloatsse;
  645. var
  646. op : topcg;
  647. sqr_sum : boolean;
  648. tmp : tnode;
  649. begin
  650. sqr_sum:=false;
  651. if (current_settings.fputype>=fpu_sse3) and
  652. use_vectorfpu(resultdef) and
  653. (nodetype in [addn,subn]) and
  654. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  655. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  656. begin
  657. sqr_sum:=true;
  658. tmp:=tinlinenode(left).left;
  659. tinlinenode(left).left:=nil;
  660. left.free;
  661. left:=tmp;
  662. tmp:=tinlinenode(right).left;
  663. tinlinenode(right).left:=nil;
  664. right.free;
  665. right:=tmp;
  666. end;
  667. pass_left_right;
  668. { fpu operands are always in reversed order on the stack }
  669. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  670. toggleflag(nf_swapped);
  671. if (nf_swapped in flags) then
  672. { can't use swapleftright if both are on the fpu stack, since then }
  673. { both are "R_ST" -> nothing would change -> manually switch }
  674. if (left.location.loc = LOC_FPUREGISTER) and
  675. (right.location.loc = LOC_FPUREGISTER) then
  676. emit_none(A_FXCH,S_NO)
  677. else
  678. swapleftright;
  679. case nodetype of
  680. addn :
  681. op:=OP_ADD;
  682. muln :
  683. op:=OP_MUL;
  684. subn :
  685. op:=OP_SUB;
  686. slashn :
  687. op:=OP_DIV;
  688. else
  689. internalerror(200312231);
  690. end;
  691. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  692. if sqr_sum then
  693. begin
  694. if nf_swapped in flags then
  695. swapleftright;
  696. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  697. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  698. location:=left.location;
  699. if is_double(resultdef) then
  700. begin
  701. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  702. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  703. case nodetype of
  704. addn:
  705. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  706. subn:
  707. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  708. else
  709. internalerror(201108162);
  710. end;
  711. end
  712. else
  713. begin
  714. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  715. { ensure that bits 64..127 contain valid values }
  716. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  717. { the data is now in bits 0..32 and 64..95 }
  718. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  719. case nodetype of
  720. addn:
  721. begin
  722. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  723. end;
  724. subn:
  725. begin
  726. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  727. end;
  728. else
  729. internalerror(201108163);
  730. end;
  731. end
  732. end
  733. { we can use only right as left operand if the operation is commutative }
  734. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  735. begin
  736. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  737. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  738. { force floating point reg. location to be written to memory,
  739. we don't force it to mm register because writing to memory
  740. allows probably shorter code because there is no direct fpu->mm register
  741. copy instruction
  742. }
  743. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  744. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  745. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  746. end
  747. else
  748. begin
  749. if nf_swapped in flags then
  750. swapleftright;
  751. { force floating point reg. location to be written to memory,
  752. we don't force it to mm register because writing to memory
  753. allows probably shorter code because there is no direct fpu->mm register
  754. copy instruction
  755. }
  756. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  757. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  758. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  759. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  760. { force floating point reg. location to be written to memory,
  761. we don't force it to mm register because writing to memory
  762. allows probably shorter code because there is no direct fpu->mm register
  763. copy instruction
  764. }
  765. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  766. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  767. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  768. end;
  769. end;
  770. procedure tx86addnode.second_addfloatavx;
  771. var
  772. op : topcg;
  773. sqr_sum : boolean;
  774. tmp : tnode;
  775. begin
  776. sqr_sum:=false;
  777. {$ifdef dummy}
  778. if (current_settings.fputype>=fpu_sse3) and
  779. use_vectorfpu(resultdef) and
  780. (nodetype in [addn,subn]) and
  781. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  782. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  783. begin
  784. sqr_sum:=true;
  785. tmp:=tinlinenode(left).left;
  786. tinlinenode(left).left:=nil;
  787. left.free;
  788. left:=tmp;
  789. tmp:=tinlinenode(right).left;
  790. tinlinenode(right).left:=nil;
  791. right.free;
  792. right:=tmp;
  793. end;
  794. {$endif dummy}
  795. pass_left_right;
  796. { fpu operands are always in reversed order on the stack }
  797. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  798. toggleflag(nf_swapped);
  799. if (nf_swapped in flags) then
  800. { can't use swapleftright if both are on the fpu stack, since then }
  801. { both are "R_ST" -> nothing would change -> manually switch }
  802. if (left.location.loc = LOC_FPUREGISTER) and
  803. (right.location.loc = LOC_FPUREGISTER) then
  804. emit_none(A_FXCH,S_NO)
  805. else
  806. swapleftright;
  807. case nodetype of
  808. addn :
  809. op:=OP_ADD;
  810. muln :
  811. op:=OP_MUL;
  812. subn :
  813. op:=OP_SUB;
  814. slashn :
  815. op:=OP_DIV;
  816. else
  817. internalerror(200312231);
  818. end;
  819. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  820. if sqr_sum then
  821. begin
  822. if nf_swapped in flags then
  823. swapleftright;
  824. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  825. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  826. location:=left.location;
  827. if is_double(resultdef) then
  828. begin
  829. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  830. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  831. case nodetype of
  832. addn:
  833. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  834. subn:
  835. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  836. else
  837. internalerror(201108162);
  838. end;
  839. end
  840. else
  841. begin
  842. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  843. { ensure that bits 64..127 contain valid values }
  844. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  845. { the data is now in bits 0..32 and 64..95 }
  846. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  847. case nodetype of
  848. addn:
  849. begin
  850. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  851. end;
  852. subn:
  853. begin
  854. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  855. end;
  856. else
  857. internalerror(201108163);
  858. end;
  859. end
  860. end
  861. { left*2 ? }
  862. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  863. begin
  864. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  865. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  866. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  867. left.location.register,
  868. left.location.register,
  869. location.register,
  870. mms_movescalar);
  871. end
  872. { right*2 ? }
  873. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  874. begin
  875. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  876. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  877. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  878. right.location.register,
  879. right.location.register,
  880. location.register,
  881. mms_movescalar);
  882. end
  883. { we can use only right as left operand if the operation is commutative }
  884. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  885. begin
  886. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  887. { force floating point reg. location to be written to memory,
  888. we don't force it to mm register because writing to memory
  889. allows probably shorter code because there is no direct fpu->mm register
  890. copy instruction
  891. }
  892. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  893. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  894. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  895. left.location,
  896. right.location.register,
  897. location.register,
  898. mms_movescalar);
  899. end
  900. else
  901. begin
  902. if (nf_swapped in flags) then
  903. swapleftright;
  904. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  905. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  906. { force floating point reg. location to be written to memory,
  907. we don't force it to mm register because writing to memory
  908. allows probably shorter code because there is no direct fpu->mm register
  909. copy instruction
  910. }
  911. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  912. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  913. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  914. right.location,
  915. left.location.register,
  916. location.register,
  917. mms_movescalar);
  918. end;
  919. end;
  920. procedure tx86addnode.second_cmpfloatsse;
  921. var
  922. op : tasmop;
  923. begin
  924. if is_single(left.resultdef) then
  925. op:=A_COMISS
  926. else if is_double(left.resultdef) then
  927. op:=A_COMISD
  928. else
  929. internalerror(200402222);
  930. pass_left_right;
  931. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  932. { we can use only right as left operand if the operation is commutative }
  933. if (right.location.loc=LOC_MMREGISTER) then
  934. begin
  935. { force floating point reg. location to be written to memory,
  936. we don't force it to mm register because writing to memory
  937. allows probably shorter code because there is no direct fpu->mm register
  938. copy instruction
  939. }
  940. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  941. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  942. case left.location.loc of
  943. LOC_REFERENCE,LOC_CREFERENCE:
  944. begin
  945. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  946. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  947. end;
  948. LOC_MMREGISTER,LOC_CMMREGISTER:
  949. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  950. else
  951. internalerror(200402221);
  952. end;
  953. if nf_swapped in flags then
  954. exclude(flags,nf_swapped)
  955. else
  956. include(flags,nf_swapped)
  957. end
  958. else
  959. begin
  960. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  961. { force floating point reg. location to be written to memory,
  962. we don't force it to mm register because writing to memory
  963. allows probably shorter code because there is no direct fpu->mm register
  964. copy instruction
  965. }
  966. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  967. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  968. case right.location.loc of
  969. LOC_REFERENCE,LOC_CREFERENCE:
  970. begin
  971. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  972. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  973. end;
  974. LOC_MMREGISTER,LOC_CMMREGISTER:
  975. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  976. else
  977. internalerror(200402223);
  978. end;
  979. end;
  980. location.resflags:=getresflags(true);
  981. end;
  982. procedure tx86addnode.second_cmpfloatavx;
  983. var
  984. op : tasmop;
  985. begin
  986. if is_single(left.resultdef) then
  987. op:=A_VCOMISS
  988. else if is_double(left.resultdef) then
  989. op:=A_VCOMISD
  990. else
  991. internalerror(200402222);
  992. pass_left_right;
  993. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  994. { we can use only right as left operand if the operation is commutative }
  995. if (right.location.loc=LOC_MMREGISTER) then
  996. begin
  997. { force floating point reg. location to be written to memory,
  998. we don't force it to mm register because writing to memory
  999. allows probably shorter code because there is no direct fpu->mm register
  1000. copy instruction
  1001. }
  1002. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1003. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1004. case left.location.loc of
  1005. LOC_REFERENCE,LOC_CREFERENCE:
  1006. begin
  1007. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1008. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1009. end;
  1010. LOC_MMREGISTER,LOC_CMMREGISTER:
  1011. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1012. else
  1013. internalerror(200402221);
  1014. end;
  1015. if nf_swapped in flags then
  1016. exclude(flags,nf_swapped)
  1017. else
  1018. include(flags,nf_swapped)
  1019. end
  1020. else
  1021. begin
  1022. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1023. { force floating point reg. location to be written to memory,
  1024. we don't force it to mm register because writing to memory
  1025. allows probably shorter code because there is no direct fpu->mm register
  1026. copy instruction
  1027. }
  1028. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1029. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1030. case right.location.loc of
  1031. LOC_REFERENCE,LOC_CREFERENCE:
  1032. begin
  1033. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1034. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1035. end;
  1036. LOC_MMREGISTER,LOC_CMMREGISTER:
  1037. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1038. else
  1039. internalerror(200402223);
  1040. end;
  1041. end;
  1042. location.resflags:=getresflags(true);
  1043. end;
  1044. procedure tx86addnode.second_opvector;
  1045. var
  1046. op : topcg;
  1047. begin
  1048. pass_left_right;
  1049. if (nf_swapped in flags) then
  1050. swapleftright;
  1051. case nodetype of
  1052. addn :
  1053. op:=OP_ADD;
  1054. muln :
  1055. op:=OP_MUL;
  1056. subn :
  1057. op:=OP_SUB;
  1058. slashn :
  1059. op:=OP_DIV;
  1060. else
  1061. internalerror(200610071);
  1062. end;
  1063. if fits_in_mm_register(left.resultdef) then
  1064. begin
  1065. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1066. { we can use only right as left operand if the operation is commutative }
  1067. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1068. begin
  1069. location.register:=right.location.register;
  1070. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1071. end
  1072. else
  1073. begin
  1074. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1075. location.register:=left.location.register;
  1076. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1077. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1078. end;
  1079. end
  1080. else
  1081. begin
  1082. { not yet supported }
  1083. internalerror(200610072);
  1084. end
  1085. end;
  1086. procedure tx86addnode.second_addfloat;
  1087. var
  1088. op : TAsmOp;
  1089. begin
  1090. if use_vectorfpu(resultdef) then
  1091. begin
  1092. if UseAVX then
  1093. second_addfloatavx
  1094. else
  1095. second_addfloatsse;
  1096. exit;
  1097. end;
  1098. pass_left_right;
  1099. case nodetype of
  1100. addn :
  1101. op:=A_FADDP;
  1102. muln :
  1103. op:=A_FMULP;
  1104. subn :
  1105. op:=A_FSUBP;
  1106. slashn :
  1107. op:=A_FDIVP;
  1108. else
  1109. internalerror(2003042214);
  1110. end;
  1111. force_left_and_right_fpureg;
  1112. { if we swaped the tree nodes, then use the reverse operator }
  1113. if nf_swapped in flags then
  1114. begin
  1115. if (nodetype=slashn) then
  1116. op:=A_FDIVRP
  1117. else if (nodetype=subn) then
  1118. op:=A_FSUBRP;
  1119. end;
  1120. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1121. tcgx86(cg).dec_fpu_stack;
  1122. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1123. location.register:=NR_ST;
  1124. end;
  1125. procedure tx86addnode.second_cmpfloat;
  1126. {$ifdef i8086}
  1127. var
  1128. tmpref: treference;
  1129. {$endif i8086}
  1130. begin
  1131. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1132. begin
  1133. if UseAVX then
  1134. second_cmpfloatavx
  1135. else
  1136. second_cmpfloatsse;
  1137. exit;
  1138. end;
  1139. pass_left_right;
  1140. force_left_and_right_fpureg;
  1141. {$ifndef x86_64}
  1142. if current_settings.cputype<cpu_Pentium2 then
  1143. begin
  1144. emit_none(A_FCOMPP,S_NO);
  1145. tcgx86(cg).dec_fpu_stack;
  1146. tcgx86(cg).dec_fpu_stack;
  1147. { load fpu flags }
  1148. {$ifdef i8086}
  1149. if current_settings.cputype < cpu_286 then
  1150. begin
  1151. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1152. emit_ref(A_FSTSW,S_NO,tmpref);
  1153. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1154. inc(tmpref.offset);
  1155. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1156. dec(tmpref.offset);
  1157. emit_none(A_SAHF,S_NO);
  1158. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1159. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1160. end
  1161. else
  1162. {$endif i8086}
  1163. begin
  1164. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1165. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1166. emit_none(A_SAHF,S_NO);
  1167. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1168. end;
  1169. end
  1170. else
  1171. {$endif x86_64}
  1172. begin
  1173. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1174. { fcomip pops only one fpu register }
  1175. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1176. tcgx86(cg).dec_fpu_stack;
  1177. tcgx86(cg).dec_fpu_stack;
  1178. end;
  1179. location_reset(location,LOC_FLAGS,OS_NO);
  1180. location.resflags:=getresflags(true);
  1181. end;
  1182. {*****************************************************************************
  1183. Add64bit
  1184. *****************************************************************************}
  1185. procedure tx86addnode.second_add64bit;
  1186. begin
  1187. {$ifdef cpu64bitalu}
  1188. second_addordinal;
  1189. {$else cpu64bitalu}
  1190. { must be implemented separate }
  1191. internalerror(200402042);
  1192. {$endif cpu64bitalu}
  1193. end;
  1194. procedure tx86addnode.second_cmp64bit;
  1195. begin
  1196. {$ifdef cpu64bitalu}
  1197. second_cmpordinal;
  1198. {$else cpu64bitalu}
  1199. { must be implemented separate }
  1200. internalerror(200402043);
  1201. {$endif cpu64bitalu}
  1202. end;
  1203. {*****************************************************************************
  1204. AddOrdinal
  1205. *****************************************************************************}
  1206. procedure tx86addnode.second_cmpordinal;
  1207. var
  1208. opdef : tdef;
  1209. opsize : tcgsize;
  1210. unsigned : boolean;
  1211. begin
  1212. unsigned:=not(is_signed(left.resultdef)) or
  1213. not(is_signed(right.resultdef));
  1214. opdef:=left.resultdef;
  1215. opsize:=def_cgsize(opdef);
  1216. pass_left_right;
  1217. if (right.location.loc=LOC_CONSTANT) and
  1218. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1219. {$ifdef x86_64}
  1220. and ((not (opsize in [OS_64,OS_S64])) or (
  1221. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1222. ))
  1223. {$endif x86_64}
  1224. then
  1225. begin
  1226. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1227. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1228. end
  1229. else
  1230. begin
  1231. left_must_be_reg(opdef,opsize,false);
  1232. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1233. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1234. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1235. end;
  1236. location_reset(location,LOC_FLAGS,OS_NO);
  1237. location.resflags:=getresflags(unsigned);
  1238. end;
  1239. begin
  1240. caddnode:=tx86addnode;
  1241. end.