nx86add.pas 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. procedure second_addfloat;override;
  40. {$ifndef i8086}
  41. procedure second_addsmallset;override;
  42. {$endif not i8086}
  43. procedure second_add64bit;override;
  44. procedure second_cmpfloat;override;
  45. procedure second_cmpsmallset;override;
  46. procedure second_cmp64bit;override;
  47. procedure second_cmpordinal;override;
  48. {$ifdef SUPPORT_MMX}
  49. procedure second_opmmx;override;
  50. {$endif SUPPORT_MMX}
  51. procedure second_opvector;override;
  52. end;
  53. implementation
  54. uses
  55. globtype,globals,
  56. verbose,cutils,
  57. cpuinfo,
  58. aasmbase,aasmtai,aasmdata,aasmcpu,
  59. symconst,symdef,
  60. cgobj,hlcgobj,cgx86,cga,cgutils,
  61. paramgr,tgobj,ncgutil,
  62. ncon,nset,ninl,
  63. defutil;
  64. {*****************************************************************************
  65. Helpers
  66. *****************************************************************************}
  67. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  68. var
  69. power : longint;
  70. hl4 : tasmlabel;
  71. r : Tregister;
  72. href : treference;
  73. begin
  74. { at this point, left.location.loc should be LOC_REGISTER }
  75. if right.location.loc=LOC_REGISTER then
  76. begin
  77. { right.location is a LOC_REGISTER }
  78. { when swapped another result register }
  79. if (nodetype=subn) and (nf_swapped in flags) then
  80. begin
  81. if extra_not then
  82. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  83. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  84. { newly swapped also set swapped flag }
  85. location_swap(left.location,right.location);
  86. toggleflag(nf_swapped);
  87. end
  88. else
  89. begin
  90. if extra_not then
  91. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  92. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  93. location_swap(left.location,right.location);
  94. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  95. end;
  96. end
  97. else
  98. begin
  99. { right.location is not a LOC_REGISTER }
  100. if (nodetype=subn) and (nf_swapped in flags) then
  101. begin
  102. if extra_not then
  103. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  104. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  105. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  106. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  107. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  108. end
  109. else
  110. begin
  111. { Optimizations when right.location is a constant value }
  112. if (op=A_CMP) and
  113. (nodetype in [equaln,unequaln]) and
  114. (right.location.loc=LOC_CONSTANT) and
  115. (right.location.value=0) then
  116. begin
  117. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  118. end
  119. else
  120. if (op=A_ADD) and
  121. (right.location.loc=LOC_CONSTANT) and
  122. (right.location.value=1) and
  123. not(cs_check_overflow in current_settings.localswitches) then
  124. begin
  125. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  126. end
  127. else
  128. if (op=A_SUB) and
  129. (right.location.loc=LOC_CONSTANT) and
  130. (right.location.value=1) and
  131. not(cs_check_overflow in current_settings.localswitches) and
  132. UseIncDec then
  133. begin
  134. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  135. end
  136. else
  137. if (op=A_IMUL) and
  138. (right.location.loc=LOC_CONSTANT) and
  139. (ispowerof2(int64(right.location.value),power)) and
  140. not(cs_check_overflow in current_settings.localswitches) then
  141. begin
  142. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  143. end
  144. else if (op=A_IMUL) and
  145. (right.location.loc=LOC_CONSTANT) and
  146. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  147. (power in [1..3]) and
  148. not(cs_check_overflow in current_settings.localswitches) then
  149. begin
  150. reference_reset_base(href,left.location.register,0,0);
  151. href.index:=left.location.register;
  152. href.scalefactor:=int64(right.location.value)-1;
  153. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  154. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  155. end
  156. else
  157. begin
  158. if extra_not then
  159. begin
  160. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  161. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  162. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  163. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  164. end
  165. else
  166. begin
  167. emit_op_right_left(op,opsize);
  168. end;
  169. end;
  170. end;
  171. end;
  172. { only in case of overflow operations }
  173. { produce overflow code }
  174. { we must put it here directly, because sign of operation }
  175. { is in unsigned VAR!! }
  176. if mboverflow then
  177. begin
  178. if cs_check_overflow in current_settings.localswitches then
  179. begin
  180. current_asmdata.getjumplabel(hl4);
  181. if unsigned then
  182. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  183. else
  184. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  185. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  186. cg.a_label(current_asmdata.CurrAsmList,hl4);
  187. end;
  188. end;
  189. end;
  190. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  191. begin
  192. { left location is not a register? }
  193. if (left.location.loc<>LOC_REGISTER) then
  194. begin
  195. { if right is register then we can swap the locations }
  196. if (not noswap) and
  197. (right.location.loc=LOC_REGISTER) then
  198. begin
  199. location_swap(left.location,right.location);
  200. toggleflag(nf_swapped);
  201. end
  202. else
  203. begin
  204. { maybe we can reuse a constant register when the
  205. operation is a comparison that doesn't change the
  206. value of the register }
  207. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  208. end;
  209. end;
  210. if (right.location.loc<>LOC_CONSTANT) and
  211. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  212. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  213. if (left.location.loc<>LOC_CONSTANT) and
  214. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  215. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  216. end;
  217. procedure tx86addnode.force_left_and_right_fpureg;
  218. begin
  219. if (right.location.loc<>LOC_FPUREGISTER) then
  220. begin
  221. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  222. if (left.location.loc<>LOC_FPUREGISTER) then
  223. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  224. else
  225. { left was on the stack => swap }
  226. toggleflag(nf_swapped);
  227. end
  228. { the nominator in st0 }
  229. else if (left.location.loc<>LOC_FPUREGISTER) then
  230. begin
  231. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  232. end
  233. else
  234. begin
  235. { fpu operands are always in the wrong order on the stack }
  236. toggleflag(nf_swapped);
  237. end;
  238. end;
  239. { Makes sides suitable for executing an x87 instruction:
  240. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  241. everything else is loaded to FPU stack. }
  242. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  243. begin
  244. refnode:=nil;
  245. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  246. 0:
  247. begin
  248. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  249. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  250. InternalError(2013090803);
  251. if (left.location.size in [OS_F32,OS_F64]) then
  252. begin
  253. refnode:=left;
  254. toggleflag(nf_swapped);
  255. end
  256. else
  257. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  258. end;
  259. 1:
  260. begin { if left is on the stack then swap. }
  261. if (left.location.loc=LOC_FPUREGISTER) then
  262. refnode:=right
  263. else
  264. refnode:=left;
  265. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  266. InternalError(2013090801);
  267. if not (refnode.location.size in [OS_F32,OS_F64]) then
  268. begin
  269. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  270. if (refnode=right) then
  271. toggleflag(nf_swapped);
  272. refnode:=nil;
  273. end
  274. else
  275. begin
  276. if (refnode=left) then
  277. toggleflag(nf_swapped);
  278. end;
  279. end;
  280. 2: { fpu operands are always in the wrong order on the stack }
  281. toggleflag(nf_swapped);
  282. else
  283. InternalError(2013090802);
  284. end;
  285. end;
  286. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  287. {$ifdef x86_64}
  288. var
  289. tmpreg : tregister;
  290. {$endif x86_64}
  291. begin
  292. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  293. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  294. { left must be a register }
  295. case right.location.loc of
  296. LOC_REGISTER,
  297. LOC_CREGISTER :
  298. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  299. LOC_REFERENCE,
  300. LOC_CREFERENCE :
  301. begin
  302. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  303. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  304. end;
  305. LOC_CONSTANT :
  306. begin
  307. {$ifdef x86_64}
  308. { x86_64 only supports signed 32 bits constants directly }
  309. if (opsize in [OS_S64,OS_64]) and
  310. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  311. begin
  312. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  313. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  314. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  315. end
  316. else
  317. {$endif x86_64}
  318. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  319. end;
  320. else
  321. internalerror(200203232);
  322. end;
  323. end;
  324. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  325. begin
  326. case nodetype of
  327. equaln : getresflags:=F_E;
  328. unequaln : getresflags:=F_NE;
  329. else
  330. if not(unsigned) then
  331. begin
  332. if nf_swapped in flags then
  333. case nodetype of
  334. ltn : getresflags:=F_G;
  335. lten : getresflags:=F_GE;
  336. gtn : getresflags:=F_L;
  337. gten : getresflags:=F_LE;
  338. else
  339. internalerror(2013120105);
  340. end
  341. else
  342. case nodetype of
  343. ltn : getresflags:=F_L;
  344. lten : getresflags:=F_LE;
  345. gtn : getresflags:=F_G;
  346. gten : getresflags:=F_GE;
  347. else
  348. internalerror(2013120106);
  349. end;
  350. end
  351. else
  352. begin
  353. if nf_swapped in flags then
  354. case nodetype of
  355. ltn : getresflags:=F_A;
  356. lten : getresflags:=F_AE;
  357. gtn : getresflags:=F_B;
  358. gten : getresflags:=F_BE;
  359. else
  360. internalerror(2013120107);
  361. end
  362. else
  363. case nodetype of
  364. ltn : getresflags:=F_B;
  365. lten : getresflags:=F_BE;
  366. gtn : getresflags:=F_A;
  367. gten : getresflags:=F_AE;
  368. else
  369. internalerror(2013120108);
  370. end;
  371. end;
  372. end;
  373. end;
  374. function tx86addnode.getfpuresflags : tresflags;
  375. begin
  376. if (nodetype=equaln) then
  377. result:=F_FE
  378. else if (nodetype=unequaln) then
  379. result:=F_FNE
  380. else if (nf_swapped in flags) then
  381. case nodetype of
  382. ltn : result:=F_FA;
  383. lten : result:=F_FAE;
  384. gtn : result:=F_FB;
  385. gten : result:=F_FBE;
  386. else
  387. internalerror(2014031402);
  388. end
  389. else
  390. case nodetype of
  391. ltn : result:=F_FB;
  392. lten : result:=F_FBE;
  393. gtn : result:=F_FA;
  394. gten : result:=F_FAE;
  395. else
  396. internalerror(2014031403);
  397. end;
  398. end;
  399. {*****************************************************************************
  400. AddSmallSet
  401. *****************************************************************************}
  402. {$ifndef i8086}
  403. procedure tx86addnode.second_addsmallset;
  404. var
  405. setbase : aint;
  406. opdef : tdef;
  407. opsize : TCGSize;
  408. op : TAsmOp;
  409. extra_not,
  410. noswap : boolean;
  411. all_member_optimization:boolean;
  412. begin
  413. pass_left_right;
  414. noswap:=false;
  415. extra_not:=false;
  416. all_member_optimization:=false;
  417. opdef:=resultdef;
  418. opsize:=int_cgsize(opdef.size);
  419. if (left.resultdef.typ=setdef) then
  420. setbase:=tsetdef(left.resultdef).setbase
  421. else
  422. setbase:=tsetdef(right.resultdef).setbase;
  423. case nodetype of
  424. addn :
  425. begin
  426. { adding elements is not commutative }
  427. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  428. swapleftright;
  429. { are we adding set elements ? }
  430. if right.nodetype=setelementn then
  431. begin
  432. { no range support for smallsets! }
  433. if assigned(tsetelementnode(right).right) then
  434. internalerror(43244);
  435. { btsb isn't supported }
  436. if opsize=OS_8 then
  437. begin
  438. opsize:=OS_32;
  439. opdef:=u32inttype;
  440. end;
  441. { bts requires both elements to be registers }
  442. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  443. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  444. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  445. op:=A_BTS;
  446. noswap:=true;
  447. end
  448. else
  449. op:=A_OR;
  450. end;
  451. symdifn :
  452. op:=A_XOR;
  453. muln :
  454. op:=A_AND;
  455. subn :
  456. begin
  457. op:=A_AND;
  458. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  459. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  460. all_member_optimization:=true;
  461. if (not(nf_swapped in flags)) and
  462. (right.location.loc=LOC_CONSTANT) then
  463. right.location.value := not(right.location.value)
  464. else if (nf_swapped in flags) and
  465. (left.location.loc=LOC_CONSTANT) then
  466. left.location.value := not(left.location.value)
  467. else
  468. extra_not:=true;
  469. end;
  470. xorn :
  471. op:=A_XOR;
  472. orn :
  473. op:=A_OR;
  474. andn :
  475. op:=A_AND;
  476. else
  477. internalerror(2003042215);
  478. end;
  479. if all_member_optimization then
  480. begin
  481. {A set expression [0..31]-x can be implemented with a simple NOT.}
  482. if nf_swapped in flags then
  483. begin
  484. { newly swapped also set swapped flag }
  485. location_swap(left.location,right.location);
  486. toggleflag(nf_swapped);
  487. end;
  488. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  489. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  490. location:=right.location;
  491. end
  492. else
  493. begin
  494. { left must be a register }
  495. left_must_be_reg(opdef,opsize,noswap);
  496. emit_generic_code(op,opsize,true,extra_not,false);
  497. location_freetemp(current_asmdata.CurrAsmList,right.location);
  498. { left is always a register and contains the result }
  499. location:=left.location;
  500. end;
  501. { fix the changed opsize we did above because of the missing btsb }
  502. if opsize<>int_cgsize(resultdef.size) then
  503. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  504. end;
  505. {$endif not i8086}
  506. procedure tx86addnode.second_cmpsmallset;
  507. var
  508. opdef : tdef;
  509. opsize : TCGSize;
  510. op : TAsmOp;
  511. begin
  512. pass_left_right;
  513. opdef:=left.resultdef;
  514. opsize:=int_cgsize(opdef.size);
  515. case nodetype of
  516. equaln,
  517. unequaln :
  518. op:=A_CMP;
  519. lten,gten:
  520. begin
  521. if (not(nf_swapped in flags) and (nodetype = lten)) or
  522. ((nf_swapped in flags) and (nodetype = gten)) then
  523. swapleftright;
  524. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  525. emit_op_right_left(A_AND,opsize);
  526. op:=A_CMP;
  527. { warning: ugly hack, we need a JE so change the node to equaln }
  528. nodetype:=equaln;
  529. end;
  530. else
  531. internalerror(2003042215);
  532. end;
  533. { left must be a register }
  534. left_must_be_reg(opdef,opsize,false);
  535. emit_generic_code(op,opsize,true,false,false);
  536. location_freetemp(current_asmdata.CurrAsmList,right.location);
  537. location_freetemp(current_asmdata.CurrAsmList,left.location);
  538. location_reset(location,LOC_FLAGS,OS_NO);
  539. location.resflags:=getresflags(true);
  540. end;
  541. {*****************************************************************************
  542. AddMMX
  543. *****************************************************************************}
  544. {$ifdef SUPPORT_MMX}
  545. procedure tx86addnode.second_opmmx;
  546. var
  547. op : TAsmOp;
  548. cmpop : boolean;
  549. mmxbase : tmmxtype;
  550. hreg,
  551. hregister : tregister;
  552. begin
  553. pass_left_right;
  554. cmpop:=false;
  555. mmxbase:=mmx_type(left.resultdef);
  556. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  557. case nodetype of
  558. addn :
  559. begin
  560. if (cs_mmx_saturation in current_settings.localswitches) then
  561. begin
  562. case mmxbase of
  563. mmxs8bit:
  564. op:=A_PADDSB;
  565. mmxu8bit:
  566. op:=A_PADDUSB;
  567. mmxs16bit,mmxfixed16:
  568. op:=A_PADDSW;
  569. mmxu16bit:
  570. op:=A_PADDUSW;
  571. end;
  572. end
  573. else
  574. begin
  575. case mmxbase of
  576. mmxs8bit,mmxu8bit:
  577. op:=A_PADDB;
  578. mmxs16bit,mmxu16bit,mmxfixed16:
  579. op:=A_PADDW;
  580. mmxs32bit,mmxu32bit:
  581. op:=A_PADDD;
  582. end;
  583. end;
  584. end;
  585. muln :
  586. begin
  587. case mmxbase of
  588. mmxs16bit,mmxu16bit:
  589. op:=A_PMULLW;
  590. mmxfixed16:
  591. op:=A_PMULHW;
  592. end;
  593. end;
  594. subn :
  595. begin
  596. if (cs_mmx_saturation in current_settings.localswitches) then
  597. begin
  598. case mmxbase of
  599. mmxs8bit:
  600. op:=A_PSUBSB;
  601. mmxu8bit:
  602. op:=A_PSUBUSB;
  603. mmxs16bit,mmxfixed16:
  604. op:=A_PSUBSB;
  605. mmxu16bit:
  606. op:=A_PSUBUSW;
  607. end;
  608. end
  609. else
  610. begin
  611. case mmxbase of
  612. mmxs8bit,mmxu8bit:
  613. op:=A_PSUBB;
  614. mmxs16bit,mmxu16bit,mmxfixed16:
  615. op:=A_PSUBW;
  616. mmxs32bit,mmxu32bit:
  617. op:=A_PSUBD;
  618. end;
  619. end;
  620. end;
  621. xorn:
  622. op:=A_PXOR;
  623. orn:
  624. op:=A_POR;
  625. andn:
  626. op:=A_PAND;
  627. else
  628. internalerror(2003042214);
  629. end;
  630. { left and right no register? }
  631. { then one must be demanded }
  632. if (left.location.loc<>LOC_MMXREGISTER) then
  633. begin
  634. if (right.location.loc=LOC_MMXREGISTER) then
  635. begin
  636. location_swap(left.location,right.location);
  637. toggleflag(nf_swapped);
  638. end
  639. else
  640. begin
  641. { register variable ? }
  642. if (left.location.loc=LOC_CMMXREGISTER) then
  643. begin
  644. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  645. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  646. end
  647. else
  648. begin
  649. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  650. internalerror(200203245);
  651. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  652. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  653. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  654. end;
  655. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  656. left.location.register:=hregister;
  657. end;
  658. end;
  659. { at this point, left.location.loc should be LOC_MMXREGISTER }
  660. if right.location.loc<>LOC_MMXREGISTER then
  661. begin
  662. if (nodetype=subn) and (nf_swapped in flags) then
  663. begin
  664. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  665. if right.location.loc=LOC_CMMXREGISTER then
  666. begin
  667. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  668. emit_reg_reg(op,S_NO,left.location.register,hreg);
  669. end
  670. else
  671. begin
  672. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  673. internalerror(200203247);
  674. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  675. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  676. emit_reg_reg(op,S_NO,left.location.register,hreg);
  677. end;
  678. location.register:=hreg;
  679. end
  680. else
  681. begin
  682. if (right.location.loc=LOC_CMMXREGISTER) then
  683. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  684. else
  685. begin
  686. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  687. internalerror(200203246);
  688. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  689. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  690. end;
  691. location.register:=left.location.register;
  692. end;
  693. end
  694. else
  695. begin
  696. { right.location=LOC_MMXREGISTER }
  697. if (nodetype=subn) and (nf_swapped in flags) then
  698. begin
  699. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  700. location_swap(left.location,right.location);
  701. toggleflag(nf_swapped);
  702. end
  703. else
  704. begin
  705. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  706. end;
  707. location.register:=left.location.register;
  708. end;
  709. location_freetemp(current_asmdata.CurrAsmList,right.location);
  710. if cmpop then
  711. location_freetemp(current_asmdata.CurrAsmList,left.location);
  712. end;
  713. {$endif SUPPORT_MMX}
  714. {*****************************************************************************
  715. AddFloat
  716. *****************************************************************************}
  717. procedure tx86addnode.second_addfloatsse;
  718. var
  719. op : topcg;
  720. sqr_sum : boolean;
  721. tmp : tnode;
  722. begin
  723. sqr_sum:=false;
  724. if (current_settings.fputype>=fpu_sse3) and
  725. use_vectorfpu(resultdef) and
  726. (nodetype in [addn,subn]) and
  727. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  728. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  729. begin
  730. sqr_sum:=true;
  731. tmp:=tinlinenode(left).left;
  732. tinlinenode(left).left:=nil;
  733. left.free;
  734. left:=tmp;
  735. tmp:=tinlinenode(right).left;
  736. tinlinenode(right).left:=nil;
  737. right.free;
  738. right:=tmp;
  739. end;
  740. pass_left_right;
  741. { fpu operands are always in reversed order on the stack }
  742. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  743. toggleflag(nf_swapped);
  744. if (nf_swapped in flags) then
  745. { can't use swapleftright if both are on the fpu stack, since then }
  746. { both are "R_ST" -> nothing would change -> manually switch }
  747. if (left.location.loc = LOC_FPUREGISTER) and
  748. (right.location.loc = LOC_FPUREGISTER) then
  749. emit_none(A_FXCH,S_NO)
  750. else
  751. swapleftright;
  752. case nodetype of
  753. addn :
  754. op:=OP_ADD;
  755. muln :
  756. op:=OP_MUL;
  757. subn :
  758. op:=OP_SUB;
  759. slashn :
  760. op:=OP_DIV;
  761. else
  762. internalerror(200312231);
  763. end;
  764. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  765. if sqr_sum then
  766. begin
  767. if nf_swapped in flags then
  768. swapleftright;
  769. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  770. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  771. location:=left.location;
  772. if is_double(resultdef) then
  773. begin
  774. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  775. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  776. case nodetype of
  777. addn:
  778. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  779. subn:
  780. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  781. else
  782. internalerror(201108162);
  783. end;
  784. end
  785. else
  786. begin
  787. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  788. { ensure that bits 64..127 contain valid values }
  789. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  790. { the data is now in bits 0..32 and 64..95 }
  791. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  792. case nodetype of
  793. addn:
  794. begin
  795. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  796. end;
  797. subn:
  798. begin
  799. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  800. end;
  801. else
  802. internalerror(201108163);
  803. end;
  804. end
  805. end
  806. { we can use only right as left operand if the operation is commutative }
  807. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  808. begin
  809. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  810. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  811. { force floating point reg. location to be written to memory,
  812. we don't force it to mm register because writing to memory
  813. allows probably shorter code because there is no direct fpu->mm register
  814. copy instruction
  815. }
  816. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  817. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  818. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  819. end
  820. else
  821. begin
  822. if nf_swapped in flags then
  823. swapleftright;
  824. { force floating point reg. location to be written to memory,
  825. we don't force it to mm register because writing to memory
  826. allows probably shorter code because there is no direct fpu->mm register
  827. copy instruction
  828. }
  829. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  830. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  831. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  832. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  833. { force floating point reg. location to be written to memory,
  834. we don't force it to mm register because writing to memory
  835. allows probably shorter code because there is no direct fpu->mm register
  836. copy instruction
  837. }
  838. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  839. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  840. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  841. end;
  842. end;
  843. procedure tx86addnode.second_addfloatavx;
  844. var
  845. op : topcg;
  846. sqr_sum : boolean;
  847. tmp : tnode;
  848. begin
  849. sqr_sum:=false;
  850. {$ifdef dummy}
  851. if (current_settings.fputype>=fpu_sse3) and
  852. use_vectorfpu(resultdef) and
  853. (nodetype in [addn,subn]) and
  854. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  855. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  856. begin
  857. sqr_sum:=true;
  858. tmp:=tinlinenode(left).left;
  859. tinlinenode(left).left:=nil;
  860. left.free;
  861. left:=tmp;
  862. tmp:=tinlinenode(right).left;
  863. tinlinenode(right).left:=nil;
  864. right.free;
  865. right:=tmp;
  866. end;
  867. {$endif dummy}
  868. pass_left_right;
  869. { fpu operands are always in reversed order on the stack }
  870. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  871. toggleflag(nf_swapped);
  872. if (nf_swapped in flags) then
  873. { can't use swapleftright if both are on the fpu stack, since then }
  874. { both are "R_ST" -> nothing would change -> manually switch }
  875. if (left.location.loc = LOC_FPUREGISTER) and
  876. (right.location.loc = LOC_FPUREGISTER) then
  877. emit_none(A_FXCH,S_NO)
  878. else
  879. swapleftright;
  880. case nodetype of
  881. addn :
  882. op:=OP_ADD;
  883. muln :
  884. op:=OP_MUL;
  885. subn :
  886. op:=OP_SUB;
  887. slashn :
  888. op:=OP_DIV;
  889. else
  890. internalerror(200312231);
  891. end;
  892. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  893. if sqr_sum then
  894. begin
  895. if nf_swapped in flags then
  896. swapleftright;
  897. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  898. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  899. location:=left.location;
  900. if is_double(resultdef) then
  901. begin
  902. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  903. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  904. case nodetype of
  905. addn:
  906. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  907. subn:
  908. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  909. else
  910. internalerror(201108162);
  911. end;
  912. end
  913. else
  914. begin
  915. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  916. { ensure that bits 64..127 contain valid values }
  917. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  918. { the data is now in bits 0..32 and 64..95 }
  919. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  920. case nodetype of
  921. addn:
  922. begin
  923. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  924. end;
  925. subn:
  926. begin
  927. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  928. end;
  929. else
  930. internalerror(201108163);
  931. end;
  932. end
  933. end
  934. { left*2 ? }
  935. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  936. begin
  937. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  938. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  939. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  940. left.location.register,
  941. left.location.register,
  942. location.register,
  943. mms_movescalar);
  944. end
  945. { right*2 ? }
  946. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  947. begin
  948. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  949. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  950. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  951. right.location.register,
  952. right.location.register,
  953. location.register,
  954. mms_movescalar);
  955. end
  956. { we can use only right as left operand if the operation is commutative }
  957. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  958. begin
  959. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  960. { force floating point reg. location to be written to memory,
  961. we don't force it to mm register because writing to memory
  962. allows probably shorter code because there is no direct fpu->mm register
  963. copy instruction
  964. }
  965. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  966. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  967. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  968. left.location,
  969. right.location.register,
  970. location.register,
  971. mms_movescalar);
  972. end
  973. else
  974. begin
  975. if (nf_swapped in flags) then
  976. swapleftright;
  977. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  978. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  979. { force floating point reg. location to be written to memory,
  980. we don't force it to mm register because writing to memory
  981. allows probably shorter code because there is no direct fpu->mm register
  982. copy instruction
  983. }
  984. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  985. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  986. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  987. right.location,
  988. left.location.register,
  989. location.register,
  990. mms_movescalar);
  991. end;
  992. end;
  993. procedure tx86addnode.second_cmpfloatvector;
  994. var
  995. op : tasmop;
  996. const
  997. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  998. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  999. begin
  1000. if is_single(left.resultdef) then
  1001. op:=ops_single[UseAVX]
  1002. else if is_double(left.resultdef) then
  1003. op:=ops_double[UseAVX]
  1004. else
  1005. internalerror(200402222);
  1006. pass_left_right;
  1007. location_reset(location,LOC_FLAGS,OS_NO);
  1008. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1009. memory (not to mm registers because one of the memory locations can be used
  1010. directly in compare instruction, yielding shorter code) }
  1011. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1012. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1013. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1014. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1015. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1016. begin
  1017. case left.location.loc of
  1018. LOC_REFERENCE,LOC_CREFERENCE:
  1019. begin
  1020. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1021. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1022. end;
  1023. LOC_MMREGISTER,LOC_CMMREGISTER:
  1024. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1025. else
  1026. internalerror(200402221);
  1027. end;
  1028. toggleflag(nf_swapped);
  1029. end
  1030. else
  1031. begin
  1032. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1033. case right.location.loc of
  1034. LOC_REFERENCE,LOC_CREFERENCE:
  1035. begin
  1036. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1037. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1038. end;
  1039. LOC_MMREGISTER,LOC_CMMREGISTER:
  1040. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1041. else
  1042. internalerror(200402223);
  1043. end;
  1044. end;
  1045. location.resflags:=getfpuresflags;
  1046. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1047. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1048. end;
  1049. procedure tx86addnode.second_opvector;
  1050. var
  1051. op : topcg;
  1052. begin
  1053. pass_left_right;
  1054. if (nf_swapped in flags) then
  1055. swapleftright;
  1056. case nodetype of
  1057. addn :
  1058. op:=OP_ADD;
  1059. muln :
  1060. op:=OP_MUL;
  1061. subn :
  1062. op:=OP_SUB;
  1063. slashn :
  1064. op:=OP_DIV;
  1065. else
  1066. internalerror(200610071);
  1067. end;
  1068. if fits_in_mm_register(left.resultdef) then
  1069. begin
  1070. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1071. { we can use only right as left operand if the operation is commutative }
  1072. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1073. begin
  1074. location.register:=right.location.register;
  1075. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1076. end
  1077. else
  1078. begin
  1079. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1080. location.register:=left.location.register;
  1081. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1082. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1083. end;
  1084. end
  1085. else
  1086. begin
  1087. { not yet supported }
  1088. internalerror(200610072);
  1089. end
  1090. end;
  1091. procedure tx86addnode.second_addfloat;
  1092. const
  1093. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1094. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1095. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1096. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1097. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1098. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1099. var
  1100. op : TAsmOp;
  1101. refnode : tnode;
  1102. hasref : boolean;
  1103. begin
  1104. if use_vectorfpu(resultdef) then
  1105. begin
  1106. if UseAVX then
  1107. second_addfloatavx
  1108. else
  1109. second_addfloatsse;
  1110. exit;
  1111. end;
  1112. pass_left_right;
  1113. prepare_x87_locations(refnode);
  1114. hasref:=assigned(refnode);
  1115. case nodetype of
  1116. addn :
  1117. op:=ops_add[hasref];
  1118. muln :
  1119. op:=ops_mul[hasref];
  1120. subn :
  1121. if (nf_swapped in flags) then
  1122. op:=ops_rsub[hasref]
  1123. else
  1124. op:=ops_sub[hasref];
  1125. slashn :
  1126. if (nf_swapped in flags) then
  1127. op:=ops_rdiv[hasref]
  1128. else
  1129. op:=ops_div[hasref];
  1130. else
  1131. internalerror(2003042214);
  1132. end;
  1133. if hasref then
  1134. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1135. else
  1136. begin
  1137. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1138. tcgx86(cg).dec_fpu_stack;
  1139. end;
  1140. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1141. location.register:=NR_ST;
  1142. end;
  1143. procedure tx86addnode.second_cmpfloat;
  1144. {$ifdef i8086}
  1145. var
  1146. tmpref: treference;
  1147. {$endif i8086}
  1148. begin
  1149. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1150. begin
  1151. second_cmpfloatvector;
  1152. exit;
  1153. end;
  1154. pass_left_right;
  1155. force_left_and_right_fpureg;
  1156. {$ifndef x86_64}
  1157. if current_settings.cputype<cpu_Pentium2 then
  1158. begin
  1159. emit_none(A_FCOMPP,S_NO);
  1160. tcgx86(cg).dec_fpu_stack;
  1161. tcgx86(cg).dec_fpu_stack;
  1162. { load fpu flags }
  1163. {$ifdef i8086}
  1164. if current_settings.cputype < cpu_286 then
  1165. begin
  1166. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1167. emit_ref(A_FSTSW,S_NO,tmpref);
  1168. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1169. inc(tmpref.offset);
  1170. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1171. dec(tmpref.offset);
  1172. emit_none(A_SAHF,S_NO);
  1173. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1174. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1175. end
  1176. else
  1177. {$endif i8086}
  1178. begin
  1179. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1180. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1181. emit_none(A_SAHF,S_NO);
  1182. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1183. end;
  1184. end
  1185. else
  1186. {$endif x86_64}
  1187. begin
  1188. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1189. { fcomip pops only one fpu register }
  1190. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1191. tcgx86(cg).dec_fpu_stack;
  1192. tcgx86(cg).dec_fpu_stack;
  1193. end;
  1194. location_reset(location,LOC_FLAGS,OS_NO);
  1195. location.resflags:=getfpuresflags;
  1196. end;
  1197. {*****************************************************************************
  1198. Add64bit
  1199. *****************************************************************************}
  1200. procedure tx86addnode.second_add64bit;
  1201. begin
  1202. {$ifdef cpu64bitalu}
  1203. second_addordinal;
  1204. {$else cpu64bitalu}
  1205. { must be implemented separate }
  1206. internalerror(200402042);
  1207. {$endif cpu64bitalu}
  1208. end;
  1209. procedure tx86addnode.second_cmp64bit;
  1210. begin
  1211. {$ifdef cpu64bitalu}
  1212. second_cmpordinal;
  1213. {$else cpu64bitalu}
  1214. { must be implemented separate }
  1215. internalerror(200402043);
  1216. {$endif cpu64bitalu}
  1217. end;
  1218. {*****************************************************************************
  1219. AddOrdinal
  1220. *****************************************************************************}
  1221. procedure tx86addnode.second_cmpordinal;
  1222. var
  1223. opdef : tdef;
  1224. opsize : tcgsize;
  1225. unsigned : boolean;
  1226. begin
  1227. unsigned:=not(is_signed(left.resultdef)) or
  1228. not(is_signed(right.resultdef));
  1229. opdef:=left.resultdef;
  1230. opsize:=def_cgsize(opdef);
  1231. pass_left_right;
  1232. if (right.location.loc=LOC_CONSTANT) and
  1233. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1234. {$ifdef x86_64}
  1235. and ((not (opsize in [OS_64,OS_S64])) or (
  1236. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1237. ))
  1238. {$endif x86_64}
  1239. then
  1240. begin
  1241. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1242. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1243. end
  1244. else
  1245. begin
  1246. left_must_be_reg(opdef,opsize,false);
  1247. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1248. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1249. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1250. end;
  1251. location_reset(location,LOC_FLAGS,OS_NO);
  1252. location.resflags:=getresflags(unsigned);
  1253. end;
  1254. begin
  1255. caddnode:=tx86addnode;
  1256. end.