nx86add.pas 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatsse;
  34. procedure second_cmpfloatavx;
  35. procedure second_addfloatsse;
  36. procedure second_addfloatavx;
  37. public
  38. procedure second_addfloat;override;
  39. {$ifndef i8086}
  40. procedure second_addsmallset;override;
  41. {$endif not i8086}
  42. procedure second_add64bit;override;
  43. procedure second_cmpfloat;override;
  44. procedure second_cmpsmallset;override;
  45. procedure second_cmp64bit;override;
  46. procedure second_cmpordinal;override;
  47. {$ifdef SUPPORT_MMX}
  48. procedure second_opmmx;override;
  49. {$endif SUPPORT_MMX}
  50. procedure second_opvector;override;
  51. end;
  52. implementation
  53. uses
  54. globtype,globals,
  55. verbose,cutils,
  56. cpuinfo,
  57. aasmbase,aasmtai,aasmdata,aasmcpu,
  58. symconst,symdef,
  59. cgobj,hlcgobj,cgx86,cga,cgutils,
  60. paramgr,tgobj,ncgutil,
  61. ncon,nset,ninl,
  62. defutil;
  63. {*****************************************************************************
  64. Helpers
  65. *****************************************************************************}
  66. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  67. var
  68. power : longint;
  69. hl4 : tasmlabel;
  70. r : Tregister;
  71. href : treference;
  72. begin
  73. { at this point, left.location.loc should be LOC_REGISTER }
  74. if right.location.loc=LOC_REGISTER then
  75. begin
  76. { right.location is a LOC_REGISTER }
  77. { when swapped another result register }
  78. if (nodetype=subn) and (nf_swapped in flags) then
  79. begin
  80. if extra_not then
  81. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  82. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  83. { newly swapped also set swapped flag }
  84. location_swap(left.location,right.location);
  85. toggleflag(nf_swapped);
  86. end
  87. else
  88. begin
  89. if extra_not then
  90. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  91. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  92. location_swap(left.location,right.location);
  93. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  94. end;
  95. end
  96. else
  97. begin
  98. { right.location is not a LOC_REGISTER }
  99. if (nodetype=subn) and (nf_swapped in flags) then
  100. begin
  101. if extra_not then
  102. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  103. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  104. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  105. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  106. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  107. end
  108. else
  109. begin
  110. { Optimizations when right.location is a constant value }
  111. if (op=A_CMP) and
  112. (nodetype in [equaln,unequaln]) and
  113. (right.location.loc=LOC_CONSTANT) and
  114. (right.location.value=0) then
  115. begin
  116. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  117. end
  118. else
  119. if (op=A_ADD) and
  120. (right.location.loc=LOC_CONSTANT) and
  121. (right.location.value=1) and
  122. not(cs_check_overflow in current_settings.localswitches) then
  123. begin
  124. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  125. end
  126. else
  127. if (op=A_SUB) and
  128. (right.location.loc=LOC_CONSTANT) and
  129. (right.location.value=1) and
  130. not(cs_check_overflow in current_settings.localswitches) and
  131. UseIncDec then
  132. begin
  133. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  134. end
  135. else
  136. if (op=A_IMUL) and
  137. (right.location.loc=LOC_CONSTANT) and
  138. (ispowerof2(int64(right.location.value),power)) and
  139. not(cs_check_overflow in current_settings.localswitches) then
  140. begin
  141. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  142. end
  143. else if (op=A_IMUL) and
  144. (right.location.loc=LOC_CONSTANT) and
  145. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  146. (power in [1..3]) and
  147. not(cs_check_overflow in current_settings.localswitches) then
  148. begin
  149. reference_reset_base(href,left.location.register,0,0);
  150. href.index:=left.location.register;
  151. href.scalefactor:=int64(right.location.value)-1;
  152. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  153. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  154. end
  155. else
  156. begin
  157. if extra_not then
  158. begin
  159. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  160. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  161. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  162. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  163. end
  164. else
  165. begin
  166. emit_op_right_left(op,opsize);
  167. end;
  168. end;
  169. end;
  170. end;
  171. { only in case of overflow operations }
  172. { produce overflow code }
  173. { we must put it here directly, because sign of operation }
  174. { is in unsigned VAR!! }
  175. if mboverflow then
  176. begin
  177. if cs_check_overflow in current_settings.localswitches then
  178. begin
  179. current_asmdata.getjumplabel(hl4);
  180. if unsigned then
  181. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  182. else
  183. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  184. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  185. cg.a_label(current_asmdata.CurrAsmList,hl4);
  186. end;
  187. end;
  188. end;
  189. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  190. begin
  191. { left location is not a register? }
  192. if (left.location.loc<>LOC_REGISTER) then
  193. begin
  194. { if right is register then we can swap the locations }
  195. if (not noswap) and
  196. (right.location.loc=LOC_REGISTER) then
  197. begin
  198. location_swap(left.location,right.location);
  199. toggleflag(nf_swapped);
  200. end
  201. else
  202. begin
  203. { maybe we can reuse a constant register when the
  204. operation is a comparison that doesn't change the
  205. value of the register }
  206. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  207. end;
  208. end;
  209. if (right.location.loc<>LOC_CONSTANT) and
  210. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  211. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  212. if (left.location.loc<>LOC_CONSTANT) and
  213. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  214. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  215. end;
  216. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  217. begin
  218. if (right.location.loc<>LOC_FPUREGISTER) then
  219. begin
  220. if (force_fpureg) then
  221. begin
  222. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  223. if (left.location.loc<>LOC_FPUREGISTER) then
  224. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  225. else
  226. { left was on the stack => swap }
  227. toggleflag(nf_swapped);
  228. end
  229. end
  230. { the nominator in st0 }
  231. else if (left.location.loc<>LOC_FPUREGISTER) then
  232. begin
  233. if (force_fpureg) then
  234. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  235. end
  236. else
  237. begin
  238. { fpu operands are always in the wrong order on the stack }
  239. toggleflag(nf_swapped);
  240. end;
  241. end;
  242. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  243. {$ifdef x86_64}
  244. var
  245. tmpreg : tregister;
  246. {$endif x86_64}
  247. begin
  248. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  249. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  250. { left must be a register }
  251. case right.location.loc of
  252. LOC_REGISTER,
  253. LOC_CREGISTER :
  254. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  255. LOC_REFERENCE,
  256. LOC_CREFERENCE :
  257. begin
  258. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  259. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  260. end;
  261. LOC_CONSTANT :
  262. begin
  263. {$ifdef x86_64}
  264. { x86_64 only supports signed 32 bits constants directly }
  265. if (opsize in [OS_S64,OS_64]) and
  266. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  267. begin
  268. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  269. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  270. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  271. end
  272. else
  273. {$endif x86_64}
  274. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  275. end;
  276. else
  277. internalerror(200203232);
  278. end;
  279. end;
  280. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  281. begin
  282. case nodetype of
  283. equaln : getresflags:=F_E;
  284. unequaln : getresflags:=F_NE;
  285. else
  286. if not(unsigned) then
  287. begin
  288. if nf_swapped in flags then
  289. case nodetype of
  290. ltn : getresflags:=F_G;
  291. lten : getresflags:=F_GE;
  292. gtn : getresflags:=F_L;
  293. gten : getresflags:=F_LE;
  294. end
  295. else
  296. case nodetype of
  297. ltn : getresflags:=F_L;
  298. lten : getresflags:=F_LE;
  299. gtn : getresflags:=F_G;
  300. gten : getresflags:=F_GE;
  301. end;
  302. end
  303. else
  304. begin
  305. if nf_swapped in flags then
  306. case nodetype of
  307. ltn : getresflags:=F_A;
  308. lten : getresflags:=F_AE;
  309. gtn : getresflags:=F_B;
  310. gten : getresflags:=F_BE;
  311. end
  312. else
  313. case nodetype of
  314. ltn : getresflags:=F_B;
  315. lten : getresflags:=F_BE;
  316. gtn : getresflags:=F_A;
  317. gten : getresflags:=F_AE;
  318. end;
  319. end;
  320. end;
  321. end;
  322. {*****************************************************************************
  323. AddSmallSet
  324. *****************************************************************************}
  325. {$ifndef i8086}
  326. procedure tx86addnode.second_addsmallset;
  327. var
  328. setbase : aint;
  329. opdef : tdef;
  330. opsize : TCGSize;
  331. op : TAsmOp;
  332. extra_not,
  333. noswap : boolean;
  334. all_member_optimization:boolean;
  335. begin
  336. pass_left_right;
  337. noswap:=false;
  338. extra_not:=false;
  339. all_member_optimization:=false;
  340. opdef:=resultdef;
  341. opsize:=int_cgsize(opdef.size);
  342. if (left.resultdef.typ=setdef) then
  343. setbase:=tsetdef(left.resultdef).setbase
  344. else
  345. setbase:=tsetdef(right.resultdef).setbase;
  346. case nodetype of
  347. addn :
  348. begin
  349. { adding elements is not commutative }
  350. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  351. swapleftright;
  352. { are we adding set elements ? }
  353. if right.nodetype=setelementn then
  354. begin
  355. { no range support for smallsets! }
  356. if assigned(tsetelementnode(right).right) then
  357. internalerror(43244);
  358. { btsb isn't supported }
  359. if opsize=OS_8 then
  360. begin
  361. opsize:=OS_32;
  362. opdef:=u32inttype;
  363. end;
  364. { bts requires both elements to be registers }
  365. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  366. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  367. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  368. op:=A_BTS;
  369. noswap:=true;
  370. end
  371. else
  372. op:=A_OR;
  373. end;
  374. symdifn :
  375. op:=A_XOR;
  376. muln :
  377. op:=A_AND;
  378. subn :
  379. begin
  380. op:=A_AND;
  381. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  382. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  383. all_member_optimization:=true;
  384. if (not(nf_swapped in flags)) and
  385. (right.location.loc=LOC_CONSTANT) then
  386. right.location.value := not(right.location.value)
  387. else if (nf_swapped in flags) and
  388. (left.location.loc=LOC_CONSTANT) then
  389. left.location.value := not(left.location.value)
  390. else
  391. extra_not:=true;
  392. end;
  393. xorn :
  394. op:=A_XOR;
  395. orn :
  396. op:=A_OR;
  397. andn :
  398. op:=A_AND;
  399. else
  400. internalerror(2003042215);
  401. end;
  402. if all_member_optimization then
  403. begin
  404. {A set expression [0..31]-x can be implemented with a simple NOT.}
  405. if nf_swapped in flags then
  406. begin
  407. { newly swapped also set swapped flag }
  408. location_swap(left.location,right.location);
  409. toggleflag(nf_swapped);
  410. end;
  411. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  412. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  413. location:=right.location;
  414. end
  415. else
  416. begin
  417. { left must be a register }
  418. left_must_be_reg(opdef,opsize,noswap);
  419. emit_generic_code(op,opsize,true,extra_not,false);
  420. location_freetemp(current_asmdata.CurrAsmList,right.location);
  421. { left is always a register and contains the result }
  422. location:=left.location;
  423. end;
  424. { fix the changed opsize we did above because of the missing btsb }
  425. if opsize<>int_cgsize(resultdef.size) then
  426. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  427. end;
  428. {$endif not i8086}
  429. procedure tx86addnode.second_cmpsmallset;
  430. var
  431. opdef : tdef;
  432. opsize : TCGSize;
  433. op : TAsmOp;
  434. begin
  435. pass_left_right;
  436. opdef:=left.resultdef;
  437. opsize:=int_cgsize(opdef.size);
  438. case nodetype of
  439. equaln,
  440. unequaln :
  441. op:=A_CMP;
  442. lten,gten:
  443. begin
  444. if (not(nf_swapped in flags) and (nodetype = lten)) or
  445. ((nf_swapped in flags) and (nodetype = gten)) then
  446. swapleftright;
  447. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  448. emit_op_right_left(A_AND,opsize);
  449. op:=A_CMP;
  450. { warning: ugly hack, we need a JE so change the node to equaln }
  451. nodetype:=equaln;
  452. end;
  453. else
  454. internalerror(2003042215);
  455. end;
  456. { left must be a register }
  457. left_must_be_reg(opdef,opsize,false);
  458. emit_generic_code(op,opsize,true,false,false);
  459. location_freetemp(current_asmdata.CurrAsmList,right.location);
  460. location_freetemp(current_asmdata.CurrAsmList,left.location);
  461. location_reset(location,LOC_FLAGS,OS_NO);
  462. location.resflags:=getresflags(true);
  463. end;
  464. {*****************************************************************************
  465. AddMMX
  466. *****************************************************************************}
  467. {$ifdef SUPPORT_MMX}
  468. procedure tx86addnode.second_opmmx;
  469. var
  470. op : TAsmOp;
  471. cmpop : boolean;
  472. mmxbase : tmmxtype;
  473. hreg,
  474. hregister : tregister;
  475. begin
  476. pass_left_right;
  477. cmpop:=false;
  478. mmxbase:=mmx_type(left.resultdef);
  479. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  480. case nodetype of
  481. addn :
  482. begin
  483. if (cs_mmx_saturation in current_settings.localswitches) then
  484. begin
  485. case mmxbase of
  486. mmxs8bit:
  487. op:=A_PADDSB;
  488. mmxu8bit:
  489. op:=A_PADDUSB;
  490. mmxs16bit,mmxfixed16:
  491. op:=A_PADDSW;
  492. mmxu16bit:
  493. op:=A_PADDUSW;
  494. end;
  495. end
  496. else
  497. begin
  498. case mmxbase of
  499. mmxs8bit,mmxu8bit:
  500. op:=A_PADDB;
  501. mmxs16bit,mmxu16bit,mmxfixed16:
  502. op:=A_PADDW;
  503. mmxs32bit,mmxu32bit:
  504. op:=A_PADDD;
  505. end;
  506. end;
  507. end;
  508. muln :
  509. begin
  510. case mmxbase of
  511. mmxs16bit,mmxu16bit:
  512. op:=A_PMULLW;
  513. mmxfixed16:
  514. op:=A_PMULHW;
  515. end;
  516. end;
  517. subn :
  518. begin
  519. if (cs_mmx_saturation in current_settings.localswitches) then
  520. begin
  521. case mmxbase of
  522. mmxs8bit:
  523. op:=A_PSUBSB;
  524. mmxu8bit:
  525. op:=A_PSUBUSB;
  526. mmxs16bit,mmxfixed16:
  527. op:=A_PSUBSB;
  528. mmxu16bit:
  529. op:=A_PSUBUSW;
  530. end;
  531. end
  532. else
  533. begin
  534. case mmxbase of
  535. mmxs8bit,mmxu8bit:
  536. op:=A_PSUBB;
  537. mmxs16bit,mmxu16bit,mmxfixed16:
  538. op:=A_PSUBW;
  539. mmxs32bit,mmxu32bit:
  540. op:=A_PSUBD;
  541. end;
  542. end;
  543. end;
  544. xorn:
  545. op:=A_PXOR;
  546. orn:
  547. op:=A_POR;
  548. andn:
  549. op:=A_PAND;
  550. else
  551. internalerror(2003042214);
  552. end;
  553. { left and right no register? }
  554. { then one must be demanded }
  555. if (left.location.loc<>LOC_MMXREGISTER) then
  556. begin
  557. if (right.location.loc=LOC_MMXREGISTER) then
  558. begin
  559. location_swap(left.location,right.location);
  560. toggleflag(nf_swapped);
  561. end
  562. else
  563. begin
  564. { register variable ? }
  565. if (left.location.loc=LOC_CMMXREGISTER) then
  566. begin
  567. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  568. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  569. end
  570. else
  571. begin
  572. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  573. internalerror(200203245);
  574. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  575. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  576. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  577. end;
  578. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  579. left.location.register:=hregister;
  580. end;
  581. end;
  582. { at this point, left.location.loc should be LOC_MMXREGISTER }
  583. if right.location.loc<>LOC_MMXREGISTER then
  584. begin
  585. if (nodetype=subn) and (nf_swapped in flags) then
  586. begin
  587. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  588. if right.location.loc=LOC_CMMXREGISTER then
  589. begin
  590. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  591. emit_reg_reg(op,S_NO,left.location.register,hreg);
  592. end
  593. else
  594. begin
  595. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  596. internalerror(200203247);
  597. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  598. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  599. emit_reg_reg(op,S_NO,left.location.register,hreg);
  600. end;
  601. location.register:=hreg;
  602. end
  603. else
  604. begin
  605. if (right.location.loc=LOC_CMMXREGISTER) then
  606. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  607. else
  608. begin
  609. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  610. internalerror(200203246);
  611. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  612. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  613. end;
  614. location.register:=left.location.register;
  615. end;
  616. end
  617. else
  618. begin
  619. { right.location=LOC_MMXREGISTER }
  620. if (nodetype=subn) and (nf_swapped in flags) then
  621. begin
  622. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  623. location_swap(left.location,right.location);
  624. toggleflag(nf_swapped);
  625. end
  626. else
  627. begin
  628. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  629. end;
  630. location.register:=left.location.register;
  631. end;
  632. location_freetemp(current_asmdata.CurrAsmList,right.location);
  633. if cmpop then
  634. location_freetemp(current_asmdata.CurrAsmList,left.location);
  635. end;
  636. {$endif SUPPORT_MMX}
  637. {*****************************************************************************
  638. AddFloat
  639. *****************************************************************************}
  640. procedure tx86addnode.second_addfloatsse;
  641. var
  642. op : topcg;
  643. sqr_sum : boolean;
  644. tmp : tnode;
  645. begin
  646. sqr_sum:=false;
  647. if (current_settings.fputype>=fpu_sse3) and
  648. use_vectorfpu(resultdef) and
  649. (nodetype in [addn,subn]) and
  650. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  651. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  652. begin
  653. sqr_sum:=true;
  654. tmp:=tinlinenode(left).left;
  655. tinlinenode(left).left:=nil;
  656. left.free;
  657. left:=tmp;
  658. tmp:=tinlinenode(right).left;
  659. tinlinenode(right).left:=nil;
  660. right.free;
  661. right:=tmp;
  662. end;
  663. pass_left_right;
  664. check_left_and_right_fpureg(false);
  665. if (nf_swapped in flags) then
  666. { can't use swapleftright if both are on the fpu stack, since then }
  667. { both are "R_ST" -> nothing would change -> manually switch }
  668. if (left.location.loc = LOC_FPUREGISTER) and
  669. (right.location.loc = LOC_FPUREGISTER) then
  670. emit_none(A_FXCH,S_NO)
  671. else
  672. swapleftright;
  673. case nodetype of
  674. addn :
  675. op:=OP_ADD;
  676. muln :
  677. op:=OP_MUL;
  678. subn :
  679. op:=OP_SUB;
  680. slashn :
  681. op:=OP_DIV;
  682. else
  683. internalerror(200312231);
  684. end;
  685. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  686. if sqr_sum then
  687. begin
  688. if nf_swapped in flags then
  689. swapleftright;
  690. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  691. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  692. location:=left.location;
  693. if is_double(resultdef) then
  694. begin
  695. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  696. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  697. case nodetype of
  698. addn:
  699. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  700. subn:
  701. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  702. else
  703. internalerror(201108162);
  704. end;
  705. end
  706. else
  707. begin
  708. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  709. { ensure that bits 64..127 contain valid values }
  710. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  711. { the data is now in bits 0..32 and 64..95 }
  712. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  713. case nodetype of
  714. addn:
  715. begin
  716. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  717. end;
  718. subn:
  719. begin
  720. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  721. end;
  722. else
  723. internalerror(201108163);
  724. end;
  725. end
  726. end
  727. { we can use only right as left operand if the operation is commutative }
  728. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  729. begin
  730. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  731. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  732. { force floating point reg. location to be written to memory,
  733. we don't force it to mm register because writing to memory
  734. allows probably shorter code because there is no direct fpu->mm register
  735. copy instruction
  736. }
  737. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  738. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  739. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  740. end
  741. else
  742. begin
  743. if nf_swapped in flags then
  744. swapleftright;
  745. { force floating point reg. location to be written to memory,
  746. we don't force it to mm register because writing to memory
  747. allows probably shorter code because there is no direct fpu->mm register
  748. copy instruction
  749. }
  750. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  751. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  752. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  753. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  754. { force floating point reg. location to be written to memory,
  755. we don't force it to mm register because writing to memory
  756. allows probably shorter code because there is no direct fpu->mm register
  757. copy instruction
  758. }
  759. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  760. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  761. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  762. end;
  763. end;
  764. procedure tx86addnode.second_addfloatavx;
  765. var
  766. op : topcg;
  767. sqr_sum : boolean;
  768. tmp : tnode;
  769. begin
  770. sqr_sum:=false;
  771. {$ifdef dummy}
  772. if (current_settings.fputype>=fpu_sse3) and
  773. use_vectorfpu(resultdef) and
  774. (nodetype in [addn,subn]) and
  775. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  776. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  777. begin
  778. sqr_sum:=true;
  779. tmp:=tinlinenode(left).left;
  780. tinlinenode(left).left:=nil;
  781. left.free;
  782. left:=tmp;
  783. tmp:=tinlinenode(right).left;
  784. tinlinenode(right).left:=nil;
  785. right.free;
  786. right:=tmp;
  787. end;
  788. {$endif dummy}
  789. pass_left_right;
  790. check_left_and_right_fpureg(false);
  791. if (nf_swapped in flags) then
  792. { can't use swapleftright if both are on the fpu stack, since then }
  793. { both are "R_ST" -> nothing would change -> manually switch }
  794. if (left.location.loc = LOC_FPUREGISTER) and
  795. (right.location.loc = LOC_FPUREGISTER) then
  796. emit_none(A_FXCH,S_NO)
  797. else
  798. swapleftright;
  799. case nodetype of
  800. addn :
  801. op:=OP_ADD;
  802. muln :
  803. op:=OP_MUL;
  804. subn :
  805. op:=OP_SUB;
  806. slashn :
  807. op:=OP_DIV;
  808. else
  809. internalerror(200312231);
  810. end;
  811. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  812. if sqr_sum then
  813. begin
  814. if nf_swapped in flags then
  815. swapleftright;
  816. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  817. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  818. location:=left.location;
  819. if is_double(resultdef) then
  820. begin
  821. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  822. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  823. case nodetype of
  824. addn:
  825. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  826. subn:
  827. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  828. else
  829. internalerror(201108162);
  830. end;
  831. end
  832. else
  833. begin
  834. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  835. { ensure that bits 64..127 contain valid values }
  836. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  837. { the data is now in bits 0..32 and 64..95 }
  838. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  839. case nodetype of
  840. addn:
  841. begin
  842. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  843. end;
  844. subn:
  845. begin
  846. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  847. end;
  848. else
  849. internalerror(201108163);
  850. end;
  851. end
  852. end
  853. { left*2 ? }
  854. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  855. begin
  856. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  857. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  858. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  859. left.location.register,
  860. left.location.register,
  861. location.register,
  862. mms_movescalar);
  863. end
  864. { right*2 ? }
  865. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  866. begin
  867. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  868. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  869. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  870. right.location.register,
  871. right.location.register,
  872. location.register,
  873. mms_movescalar);
  874. end
  875. { we can use only right as left operand if the operation is commutative }
  876. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  877. begin
  878. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  879. { force floating point reg. location to be written to memory,
  880. we don't force it to mm register because writing to memory
  881. allows probably shorter code because there is no direct fpu->mm register
  882. copy instruction
  883. }
  884. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  885. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  886. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  887. left.location,
  888. right.location.register,
  889. location.register,
  890. mms_movescalar);
  891. end
  892. else
  893. begin
  894. if (nf_swapped in flags) then
  895. swapleftright;
  896. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  897. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  898. { force floating point reg. location to be written to memory,
  899. we don't force it to mm register because writing to memory
  900. allows probably shorter code because there is no direct fpu->mm register
  901. copy instruction
  902. }
  903. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  904. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  905. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  906. right.location,
  907. left.location.register,
  908. location.register,
  909. mms_movescalar);
  910. end;
  911. end;
  912. procedure tx86addnode.second_cmpfloatsse;
  913. var
  914. op : tasmop;
  915. begin
  916. if is_single(left.resultdef) then
  917. op:=A_COMISS
  918. else if is_double(left.resultdef) then
  919. op:=A_COMISD
  920. else
  921. internalerror(200402222);
  922. pass_left_right;
  923. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  924. { we can use only right as left operand if the operation is commutative }
  925. if (right.location.loc=LOC_MMREGISTER) then
  926. begin
  927. { force floating point reg. location to be written to memory,
  928. we don't force it to mm register because writing to memory
  929. allows probably shorter code because there is no direct fpu->mm register
  930. copy instruction
  931. }
  932. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  933. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  934. case left.location.loc of
  935. LOC_REFERENCE,LOC_CREFERENCE:
  936. begin
  937. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  938. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  939. end;
  940. LOC_MMREGISTER,LOC_CMMREGISTER:
  941. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  942. else
  943. internalerror(200402221);
  944. end;
  945. if nf_swapped in flags then
  946. exclude(flags,nf_swapped)
  947. else
  948. include(flags,nf_swapped)
  949. end
  950. else
  951. begin
  952. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  953. { force floating point reg. location to be written to memory,
  954. we don't force it to mm register because writing to memory
  955. allows probably shorter code because there is no direct fpu->mm register
  956. copy instruction
  957. }
  958. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  959. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  960. case right.location.loc of
  961. LOC_REFERENCE,LOC_CREFERENCE:
  962. begin
  963. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  964. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  965. end;
  966. LOC_MMREGISTER,LOC_CMMREGISTER:
  967. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  968. else
  969. internalerror(200402223);
  970. end;
  971. end;
  972. location.resflags:=getresflags(true);
  973. end;
  974. procedure tx86addnode.second_cmpfloatavx;
  975. var
  976. op : tasmop;
  977. begin
  978. if is_single(left.resultdef) then
  979. op:=A_VCOMISS
  980. else if is_double(left.resultdef) then
  981. op:=A_VCOMISD
  982. else
  983. internalerror(200402222);
  984. pass_left_right;
  985. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  986. { we can use only right as left operand if the operation is commutative }
  987. if (right.location.loc=LOC_MMREGISTER) then
  988. begin
  989. { force floating point reg. location to be written to memory,
  990. we don't force it to mm register because writing to memory
  991. allows probably shorter code because there is no direct fpu->mm register
  992. copy instruction
  993. }
  994. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  995. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  996. case left.location.loc of
  997. LOC_REFERENCE,LOC_CREFERENCE:
  998. begin
  999. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1000. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1001. end;
  1002. LOC_MMREGISTER,LOC_CMMREGISTER:
  1003. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1004. else
  1005. internalerror(200402221);
  1006. end;
  1007. if nf_swapped in flags then
  1008. exclude(flags,nf_swapped)
  1009. else
  1010. include(flags,nf_swapped)
  1011. end
  1012. else
  1013. begin
  1014. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1015. { force floating point reg. location to be written to memory,
  1016. we don't force it to mm register because writing to memory
  1017. allows probably shorter code because there is no direct fpu->mm register
  1018. copy instruction
  1019. }
  1020. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1021. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1022. case right.location.loc of
  1023. LOC_REFERENCE,LOC_CREFERENCE:
  1024. begin
  1025. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1026. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1027. end;
  1028. LOC_MMREGISTER,LOC_CMMREGISTER:
  1029. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1030. else
  1031. internalerror(200402223);
  1032. end;
  1033. end;
  1034. location.resflags:=getresflags(true);
  1035. end;
  1036. procedure tx86addnode.second_opvector;
  1037. var
  1038. op : topcg;
  1039. begin
  1040. pass_left_right;
  1041. if (nf_swapped in flags) then
  1042. swapleftright;
  1043. case nodetype of
  1044. addn :
  1045. op:=OP_ADD;
  1046. muln :
  1047. op:=OP_MUL;
  1048. subn :
  1049. op:=OP_SUB;
  1050. slashn :
  1051. op:=OP_DIV;
  1052. else
  1053. internalerror(200610071);
  1054. end;
  1055. if fits_in_mm_register(left.resultdef) then
  1056. begin
  1057. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1058. { we can use only right as left operand if the operation is commutative }
  1059. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1060. begin
  1061. location.register:=right.location.register;
  1062. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1063. end
  1064. else
  1065. begin
  1066. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1067. location.register:=left.location.register;
  1068. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1069. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1070. end;
  1071. end
  1072. else
  1073. begin
  1074. { not yet supported }
  1075. internalerror(200610072);
  1076. end
  1077. end;
  1078. procedure tx86addnode.second_addfloat;
  1079. var
  1080. op : TAsmOp;
  1081. begin
  1082. if use_vectorfpu(resultdef) then
  1083. begin
  1084. if UseAVX then
  1085. second_addfloatavx
  1086. else
  1087. second_addfloatsse;
  1088. exit;
  1089. end;
  1090. pass_left_right;
  1091. case nodetype of
  1092. addn :
  1093. op:=A_FADDP;
  1094. muln :
  1095. op:=A_FMULP;
  1096. subn :
  1097. op:=A_FSUBP;
  1098. slashn :
  1099. op:=A_FDIVP;
  1100. else
  1101. internalerror(2003042214);
  1102. end;
  1103. check_left_and_right_fpureg(true);
  1104. { if we swaped the tree nodes, then use the reverse operator }
  1105. if nf_swapped in flags then
  1106. begin
  1107. if (nodetype=slashn) then
  1108. op:=A_FDIVRP
  1109. else if (nodetype=subn) then
  1110. op:=A_FSUBRP;
  1111. end;
  1112. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1113. tcgx86(cg).dec_fpu_stack;
  1114. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1115. location.register:=NR_ST;
  1116. end;
  1117. procedure tx86addnode.second_cmpfloat;
  1118. {$ifdef i8086}
  1119. var
  1120. tmpref: treference;
  1121. {$endif i8086}
  1122. begin
  1123. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1124. begin
  1125. if UseAVX then
  1126. second_cmpfloatavx
  1127. else
  1128. second_cmpfloatsse;
  1129. exit;
  1130. end;
  1131. pass_left_right;
  1132. check_left_and_right_fpureg(true);
  1133. {$ifndef x86_64}
  1134. if current_settings.cputype<cpu_Pentium2 then
  1135. begin
  1136. emit_none(A_FCOMPP,S_NO);
  1137. tcgx86(cg).dec_fpu_stack;
  1138. tcgx86(cg).dec_fpu_stack;
  1139. { load fpu flags }
  1140. {$ifdef i8086}
  1141. if current_settings.cputype < cpu_286 then
  1142. begin
  1143. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1144. emit_ref(A_FNSTSW,S_NO,tmpref);
  1145. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1146. emit_ref_reg(A_MOV,S_W,tmpref,NR_AX);
  1147. emit_none(A_SAHF,S_NO);
  1148. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1149. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1150. end
  1151. else
  1152. {$endif i8086}
  1153. begin
  1154. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1155. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1156. emit_none(A_SAHF,S_NO);
  1157. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1158. end;
  1159. end
  1160. else
  1161. {$endif x86_64}
  1162. begin
  1163. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1164. { fcomip pops only one fpu register }
  1165. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1166. tcgx86(cg).dec_fpu_stack;
  1167. tcgx86(cg).dec_fpu_stack;
  1168. end;
  1169. location_reset(location,LOC_FLAGS,OS_NO);
  1170. location.resflags:=getresflags(true);
  1171. end;
  1172. {*****************************************************************************
  1173. Add64bit
  1174. *****************************************************************************}
  1175. procedure tx86addnode.second_add64bit;
  1176. begin
  1177. {$ifdef cpu64bitalu}
  1178. second_addordinal;
  1179. {$else cpu64bitalu}
  1180. { must be implemented separate }
  1181. internalerror(200402042);
  1182. {$endif cpu64bitalu}
  1183. end;
  1184. procedure tx86addnode.second_cmp64bit;
  1185. begin
  1186. {$ifdef cpu64bitalu}
  1187. second_cmpordinal;
  1188. {$else cpu64bitalu}
  1189. { must be implemented separate }
  1190. internalerror(200402043);
  1191. {$endif cpu64bitalu}
  1192. end;
  1193. {*****************************************************************************
  1194. AddOrdinal
  1195. *****************************************************************************}
  1196. procedure tx86addnode.second_cmpordinal;
  1197. var
  1198. opdef : tdef;
  1199. opsize : tcgsize;
  1200. unsigned : boolean;
  1201. begin
  1202. unsigned:=not(is_signed(left.resultdef)) or
  1203. not(is_signed(right.resultdef));
  1204. opdef:=left.resultdef;
  1205. opsize:=def_cgsize(opdef);
  1206. pass_left_right;
  1207. left_must_be_reg(opdef,opsize,false);
  1208. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1209. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1210. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1211. location_reset(location,LOC_FLAGS,OS_NO);
  1212. location.resflags:=getresflags(unsigned);
  1213. end;
  1214. begin
  1215. caddnode:=tx86addnode;
  1216. end.