nx86add.pas 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. procedure second_addfloat;override;
  40. {$ifndef i8086}
  41. procedure second_addsmallset;override;
  42. {$endif not i8086}
  43. procedure second_add64bit;override;
  44. procedure second_cmpfloat;override;
  45. procedure second_cmpsmallset;override;
  46. procedure second_cmp64bit;override;
  47. procedure second_cmpordinal;override;
  48. {$ifdef SUPPORT_MMX}
  49. procedure second_opmmx;override;
  50. {$endif SUPPORT_MMX}
  51. procedure second_opvector;override;
  52. end;
  53. implementation
  54. uses
  55. globtype,globals,systems,
  56. verbose,cutils,
  57. cpuinfo,
  58. aasmbase,aasmtai,aasmdata,aasmcpu,
  59. symconst,symdef,
  60. cgobj,hlcgobj,cgx86,cga,cgutils,
  61. paramgr,tgobj,ncgutil,
  62. ncon,nset,ninl,
  63. defutil;
  64. {*****************************************************************************
  65. Helpers
  66. *****************************************************************************}
  67. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  68. var
  69. power : longint;
  70. hl4 : tasmlabel;
  71. r : Tregister;
  72. href : treference;
  73. begin
  74. { at this point, left.location.loc should be LOC_REGISTER }
  75. if right.location.loc=LOC_REGISTER then
  76. begin
  77. { right.location is a LOC_REGISTER }
  78. { when swapped another result register }
  79. if (nodetype=subn) and (nf_swapped in flags) then
  80. begin
  81. if extra_not then
  82. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  83. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  84. { newly swapped also set swapped flag }
  85. location_swap(left.location,right.location);
  86. toggleflag(nf_swapped);
  87. end
  88. else
  89. begin
  90. if extra_not then
  91. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  92. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  93. location_swap(left.location,right.location);
  94. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  95. end;
  96. end
  97. else
  98. begin
  99. { right.location is not a LOC_REGISTER }
  100. if (nodetype=subn) and (nf_swapped in flags) then
  101. begin
  102. if extra_not then
  103. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  104. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  105. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  106. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  107. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  108. end
  109. else
  110. begin
  111. { Optimizations when right.location is a constant value }
  112. if (op=A_CMP) and
  113. (nodetype in [equaln,unequaln]) and
  114. (right.location.loc=LOC_CONSTANT) and
  115. (right.location.value=0) then
  116. begin
  117. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  118. spilling, while 'test %reg,%reg' still requires loading into register.
  119. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  120. peephole optimizer (this optimization is currently available only for i386). }
  121. if (target_info.cpu=cpu_i386) then
  122. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  123. else
  124. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  125. end
  126. else
  127. if (op=A_ADD) and
  128. (right.location.loc=LOC_CONSTANT) and
  129. (right.location.value=1) and
  130. not(cs_check_overflow in current_settings.localswitches) then
  131. begin
  132. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  133. end
  134. else
  135. if (op=A_SUB) and
  136. (right.location.loc=LOC_CONSTANT) and
  137. (right.location.value=1) and
  138. not(cs_check_overflow in current_settings.localswitches) and
  139. UseIncDec then
  140. begin
  141. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  142. end
  143. else
  144. if (op=A_IMUL) and
  145. (right.location.loc=LOC_CONSTANT) and
  146. (ispowerof2(int64(right.location.value),power)) and
  147. not(cs_check_overflow in current_settings.localswitches) then
  148. begin
  149. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  150. end
  151. else if (op=A_IMUL) and
  152. (right.location.loc=LOC_CONSTANT) and
  153. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  154. (power in [1..3]) and
  155. not(cs_check_overflow in current_settings.localswitches) then
  156. begin
  157. reference_reset_base(href,left.location.register,0,0);
  158. href.index:=left.location.register;
  159. href.scalefactor:=int64(right.location.value)-1;
  160. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  161. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  162. end
  163. else
  164. begin
  165. if extra_not then
  166. begin
  167. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  168. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  169. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  170. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  171. end
  172. else
  173. begin
  174. emit_op_right_left(op,opsize);
  175. end;
  176. end;
  177. end;
  178. end;
  179. { only in case of overflow operations }
  180. { produce overflow code }
  181. { we must put it here directly, because sign of operation }
  182. { is in unsigned VAR!! }
  183. if mboverflow then
  184. begin
  185. if cs_check_overflow in current_settings.localswitches then
  186. begin
  187. current_asmdata.getjumplabel(hl4);
  188. if unsigned then
  189. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  190. else
  191. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  192. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  193. cg.a_label(current_asmdata.CurrAsmList,hl4);
  194. end;
  195. end;
  196. end;
  197. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  198. begin
  199. { left location is not a register? }
  200. if (left.location.loc<>LOC_REGISTER) then
  201. begin
  202. { if right is register then we can swap the locations }
  203. if (not noswap) and
  204. (right.location.loc=LOC_REGISTER) then
  205. begin
  206. location_swap(left.location,right.location);
  207. toggleflag(nf_swapped);
  208. end
  209. else
  210. begin
  211. { maybe we can reuse a constant register when the
  212. operation is a comparison that doesn't change the
  213. value of the register }
  214. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  215. end;
  216. end;
  217. if (right.location.loc<>LOC_CONSTANT) and
  218. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  219. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  220. if (left.location.loc<>LOC_CONSTANT) and
  221. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  222. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  223. end;
  224. procedure tx86addnode.force_left_and_right_fpureg;
  225. begin
  226. if (right.location.loc<>LOC_FPUREGISTER) then
  227. begin
  228. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  229. if (left.location.loc<>LOC_FPUREGISTER) then
  230. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  231. else
  232. { left was on the stack => swap }
  233. toggleflag(nf_swapped);
  234. end
  235. { the nominator in st0 }
  236. else if (left.location.loc<>LOC_FPUREGISTER) then
  237. begin
  238. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  239. end
  240. else
  241. begin
  242. { fpu operands are always in the wrong order on the stack }
  243. toggleflag(nf_swapped);
  244. end;
  245. end;
  246. { Makes sides suitable for executing an x87 instruction:
  247. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  248. everything else is loaded to FPU stack. }
  249. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  250. begin
  251. refnode:=nil;
  252. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  253. 0:
  254. begin
  255. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  256. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  257. InternalError(2013090803);
  258. if (left.location.size in [OS_F32,OS_F64]) then
  259. begin
  260. refnode:=left;
  261. toggleflag(nf_swapped);
  262. end
  263. else
  264. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  265. end;
  266. 1:
  267. begin { if left is on the stack then swap. }
  268. if (left.location.loc=LOC_FPUREGISTER) then
  269. refnode:=right
  270. else
  271. refnode:=left;
  272. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  273. InternalError(2013090801);
  274. if not (refnode.location.size in [OS_F32,OS_F64]) then
  275. begin
  276. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  277. if (refnode=right) then
  278. toggleflag(nf_swapped);
  279. refnode:=nil;
  280. end
  281. else
  282. begin
  283. if (refnode=left) then
  284. toggleflag(nf_swapped);
  285. end;
  286. end;
  287. 2: { fpu operands are always in the wrong order on the stack }
  288. toggleflag(nf_swapped);
  289. else
  290. InternalError(2013090802);
  291. end;
  292. end;
  293. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  294. {$ifdef x86_64}
  295. var
  296. tmpreg : tregister;
  297. {$endif x86_64}
  298. begin
  299. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  300. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  301. { left must be a register }
  302. case right.location.loc of
  303. LOC_REGISTER,
  304. LOC_CREGISTER :
  305. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  306. LOC_REFERENCE,
  307. LOC_CREFERENCE :
  308. begin
  309. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  310. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  311. end;
  312. LOC_CONSTANT :
  313. begin
  314. {$ifdef x86_64}
  315. { x86_64 only supports signed 32 bits constants directly }
  316. if (opsize in [OS_S64,OS_64]) and
  317. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  318. begin
  319. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  320. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  321. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  322. end
  323. else
  324. {$endif x86_64}
  325. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  326. end;
  327. else
  328. internalerror(200203232);
  329. end;
  330. end;
  331. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  332. begin
  333. case nodetype of
  334. equaln : getresflags:=F_E;
  335. unequaln : getresflags:=F_NE;
  336. else
  337. if not(unsigned) then
  338. begin
  339. if nf_swapped in flags then
  340. case nodetype of
  341. ltn : getresflags:=F_G;
  342. lten : getresflags:=F_GE;
  343. gtn : getresflags:=F_L;
  344. gten : getresflags:=F_LE;
  345. else
  346. internalerror(2013120105);
  347. end
  348. else
  349. case nodetype of
  350. ltn : getresflags:=F_L;
  351. lten : getresflags:=F_LE;
  352. gtn : getresflags:=F_G;
  353. gten : getresflags:=F_GE;
  354. else
  355. internalerror(2013120106);
  356. end;
  357. end
  358. else
  359. begin
  360. if nf_swapped in flags then
  361. case nodetype of
  362. ltn : getresflags:=F_A;
  363. lten : getresflags:=F_AE;
  364. gtn : getresflags:=F_B;
  365. gten : getresflags:=F_BE;
  366. else
  367. internalerror(2013120107);
  368. end
  369. else
  370. case nodetype of
  371. ltn : getresflags:=F_B;
  372. lten : getresflags:=F_BE;
  373. gtn : getresflags:=F_A;
  374. gten : getresflags:=F_AE;
  375. else
  376. internalerror(2013120108);
  377. end;
  378. end;
  379. end;
  380. end;
  381. function tx86addnode.getfpuresflags : tresflags;
  382. begin
  383. if (nodetype=equaln) then
  384. result:=F_FE
  385. else if (nodetype=unequaln) then
  386. result:=F_FNE
  387. else if (nf_swapped in flags) then
  388. case nodetype of
  389. ltn : result:=F_FA;
  390. lten : result:=F_FAE;
  391. gtn : result:=F_FB;
  392. gten : result:=F_FBE;
  393. else
  394. internalerror(2014031402);
  395. end
  396. else
  397. case nodetype of
  398. ltn : result:=F_FB;
  399. lten : result:=F_FBE;
  400. gtn : result:=F_FA;
  401. gten : result:=F_FAE;
  402. else
  403. internalerror(2014031403);
  404. end;
  405. end;
  406. {*****************************************************************************
  407. AddSmallSet
  408. *****************************************************************************}
  409. {$ifndef i8086}
  410. procedure tx86addnode.second_addsmallset;
  411. var
  412. setbase : aint;
  413. opdef : tdef;
  414. opsize : TCGSize;
  415. op : TAsmOp;
  416. extra_not,
  417. noswap : boolean;
  418. all_member_optimization:boolean;
  419. begin
  420. pass_left_right;
  421. noswap:=false;
  422. extra_not:=false;
  423. all_member_optimization:=false;
  424. opdef:=resultdef;
  425. opsize:=int_cgsize(opdef.size);
  426. if (left.resultdef.typ=setdef) then
  427. setbase:=tsetdef(left.resultdef).setbase
  428. else
  429. setbase:=tsetdef(right.resultdef).setbase;
  430. case nodetype of
  431. addn :
  432. begin
  433. { adding elements is not commutative }
  434. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  435. swapleftright;
  436. { are we adding set elements ? }
  437. if right.nodetype=setelementn then
  438. begin
  439. { no range support for smallsets! }
  440. if assigned(tsetelementnode(right).right) then
  441. internalerror(43244);
  442. { btsb isn't supported }
  443. if opsize=OS_8 then
  444. begin
  445. opsize:=OS_32;
  446. opdef:=u32inttype;
  447. end;
  448. { bts requires both elements to be registers }
  449. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  450. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  451. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  452. op:=A_BTS;
  453. noswap:=true;
  454. end
  455. else
  456. op:=A_OR;
  457. end;
  458. symdifn :
  459. op:=A_XOR;
  460. muln :
  461. op:=A_AND;
  462. subn :
  463. begin
  464. op:=A_AND;
  465. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  466. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  467. all_member_optimization:=true;
  468. if (not(nf_swapped in flags)) and
  469. (right.location.loc=LOC_CONSTANT) then
  470. right.location.value := not(right.location.value)
  471. else if (nf_swapped in flags) and
  472. (left.location.loc=LOC_CONSTANT) then
  473. left.location.value := not(left.location.value)
  474. else
  475. extra_not:=true;
  476. end;
  477. xorn :
  478. op:=A_XOR;
  479. orn :
  480. op:=A_OR;
  481. andn :
  482. op:=A_AND;
  483. else
  484. internalerror(2003042215);
  485. end;
  486. if all_member_optimization then
  487. begin
  488. {A set expression [0..31]-x can be implemented with a simple NOT.}
  489. if nf_swapped in flags then
  490. begin
  491. { newly swapped also set swapped flag }
  492. location_swap(left.location,right.location);
  493. toggleflag(nf_swapped);
  494. end;
  495. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  496. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  497. location:=right.location;
  498. end
  499. else
  500. begin
  501. { left must be a register }
  502. left_must_be_reg(opdef,opsize,noswap);
  503. emit_generic_code(op,opsize,true,extra_not,false);
  504. location_freetemp(current_asmdata.CurrAsmList,right.location);
  505. { left is always a register and contains the result }
  506. location:=left.location;
  507. end;
  508. { fix the changed opsize we did above because of the missing btsb }
  509. if opsize<>int_cgsize(resultdef.size) then
  510. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  511. end;
  512. {$endif not i8086}
  513. procedure tx86addnode.second_cmpsmallset;
  514. var
  515. opdef : tdef;
  516. opsize : TCGSize;
  517. op : TAsmOp;
  518. begin
  519. pass_left_right;
  520. opdef:=left.resultdef;
  521. opsize:=int_cgsize(opdef.size);
  522. case nodetype of
  523. equaln,
  524. unequaln :
  525. op:=A_CMP;
  526. lten,gten:
  527. begin
  528. if (not(nf_swapped in flags) and (nodetype = lten)) or
  529. ((nf_swapped in flags) and (nodetype = gten)) then
  530. swapleftright;
  531. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  532. emit_op_right_left(A_AND,opsize);
  533. op:=A_CMP;
  534. { warning: ugly hack, we need a JE so change the node to equaln }
  535. nodetype:=equaln;
  536. end;
  537. else
  538. internalerror(2003042215);
  539. end;
  540. { left must be a register }
  541. left_must_be_reg(opdef,opsize,false);
  542. emit_generic_code(op,opsize,true,false,false);
  543. location_freetemp(current_asmdata.CurrAsmList,right.location);
  544. location_freetemp(current_asmdata.CurrAsmList,left.location);
  545. location_reset(location,LOC_FLAGS,OS_NO);
  546. location.resflags:=getresflags(true);
  547. end;
  548. {*****************************************************************************
  549. AddMMX
  550. *****************************************************************************}
  551. {$ifdef SUPPORT_MMX}
  552. procedure tx86addnode.second_opmmx;
  553. var
  554. op : TAsmOp;
  555. cmpop : boolean;
  556. mmxbase : tmmxtype;
  557. hreg,
  558. hregister : tregister;
  559. begin
  560. pass_left_right;
  561. cmpop:=false;
  562. op:=A_NOP;
  563. mmxbase:=mmx_type(left.resultdef);
  564. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  565. case nodetype of
  566. addn :
  567. begin
  568. if (cs_mmx_saturation in current_settings.localswitches) then
  569. begin
  570. case mmxbase of
  571. mmxs8bit:
  572. op:=A_PADDSB;
  573. mmxu8bit:
  574. op:=A_PADDUSB;
  575. mmxs16bit,mmxfixed16:
  576. op:=A_PADDSW;
  577. mmxu16bit:
  578. op:=A_PADDUSW;
  579. end;
  580. end
  581. else
  582. begin
  583. case mmxbase of
  584. mmxs8bit,mmxu8bit:
  585. op:=A_PADDB;
  586. mmxs16bit,mmxu16bit,mmxfixed16:
  587. op:=A_PADDW;
  588. mmxs32bit,mmxu32bit:
  589. op:=A_PADDD;
  590. end;
  591. end;
  592. end;
  593. muln :
  594. begin
  595. case mmxbase of
  596. mmxs16bit,mmxu16bit:
  597. op:=A_PMULLW;
  598. mmxfixed16:
  599. op:=A_PMULHW;
  600. end;
  601. end;
  602. subn :
  603. begin
  604. if (cs_mmx_saturation in current_settings.localswitches) then
  605. begin
  606. case mmxbase of
  607. mmxs8bit:
  608. op:=A_PSUBSB;
  609. mmxu8bit:
  610. op:=A_PSUBUSB;
  611. mmxs16bit,mmxfixed16:
  612. op:=A_PSUBSB;
  613. mmxu16bit:
  614. op:=A_PSUBUSW;
  615. end;
  616. end
  617. else
  618. begin
  619. case mmxbase of
  620. mmxs8bit,mmxu8bit:
  621. op:=A_PSUBB;
  622. mmxs16bit,mmxu16bit,mmxfixed16:
  623. op:=A_PSUBW;
  624. mmxs32bit,mmxu32bit:
  625. op:=A_PSUBD;
  626. end;
  627. end;
  628. end;
  629. xorn:
  630. op:=A_PXOR;
  631. orn:
  632. op:=A_POR;
  633. andn:
  634. op:=A_PAND;
  635. else
  636. internalerror(2003042214);
  637. end;
  638. if op = A_NOP then
  639. internalerror(201408201);
  640. { left and right no register? }
  641. { then one must be demanded }
  642. if (left.location.loc<>LOC_MMXREGISTER) then
  643. begin
  644. if (right.location.loc=LOC_MMXREGISTER) then
  645. begin
  646. location_swap(left.location,right.location);
  647. toggleflag(nf_swapped);
  648. end
  649. else
  650. begin
  651. { register variable ? }
  652. if (left.location.loc=LOC_CMMXREGISTER) then
  653. begin
  654. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  655. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  656. end
  657. else
  658. begin
  659. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  660. internalerror(200203245);
  661. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  662. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  663. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  664. end;
  665. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  666. left.location.register:=hregister;
  667. end;
  668. end;
  669. { at this point, left.location.loc should be LOC_MMXREGISTER }
  670. if right.location.loc<>LOC_MMXREGISTER then
  671. begin
  672. if (nodetype=subn) and (nf_swapped in flags) then
  673. begin
  674. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  675. if right.location.loc=LOC_CMMXREGISTER then
  676. begin
  677. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  678. emit_reg_reg(op,S_NO,left.location.register,hreg);
  679. end
  680. else
  681. begin
  682. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  683. internalerror(200203247);
  684. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  685. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  686. emit_reg_reg(op,S_NO,left.location.register,hreg);
  687. end;
  688. location.register:=hreg;
  689. end
  690. else
  691. begin
  692. if (right.location.loc=LOC_CMMXREGISTER) then
  693. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  694. else
  695. begin
  696. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  697. internalerror(200203246);
  698. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  699. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  700. end;
  701. location.register:=left.location.register;
  702. end;
  703. end
  704. else
  705. begin
  706. { right.location=LOC_MMXREGISTER }
  707. if (nodetype=subn) and (nf_swapped in flags) then
  708. begin
  709. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  710. location_swap(left.location,right.location);
  711. toggleflag(nf_swapped);
  712. end
  713. else
  714. begin
  715. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  716. end;
  717. location.register:=left.location.register;
  718. end;
  719. location_freetemp(current_asmdata.CurrAsmList,right.location);
  720. if cmpop then
  721. location_freetemp(current_asmdata.CurrAsmList,left.location);
  722. end;
  723. {$endif SUPPORT_MMX}
  724. {*****************************************************************************
  725. AddFloat
  726. *****************************************************************************}
  727. procedure tx86addnode.second_addfloatsse;
  728. var
  729. op : topcg;
  730. sqr_sum : boolean;
  731. tmp : tnode;
  732. begin
  733. sqr_sum:=false;
  734. if (current_settings.fputype>=fpu_sse3) and
  735. use_vectorfpu(resultdef) and
  736. (nodetype in [addn,subn]) and
  737. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  738. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  739. begin
  740. sqr_sum:=true;
  741. tmp:=tinlinenode(left).left;
  742. tinlinenode(left).left:=nil;
  743. left.free;
  744. left:=tmp;
  745. tmp:=tinlinenode(right).left;
  746. tinlinenode(right).left:=nil;
  747. right.free;
  748. right:=tmp;
  749. end;
  750. pass_left_right;
  751. { fpu operands are always in reversed order on the stack }
  752. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  753. toggleflag(nf_swapped);
  754. if (nf_swapped in flags) then
  755. { can't use swapleftright if both are on the fpu stack, since then }
  756. { both are "R_ST" -> nothing would change -> manually switch }
  757. if (left.location.loc = LOC_FPUREGISTER) and
  758. (right.location.loc = LOC_FPUREGISTER) then
  759. emit_none(A_FXCH,S_NO)
  760. else
  761. swapleftright;
  762. case nodetype of
  763. addn :
  764. op:=OP_ADD;
  765. muln :
  766. op:=OP_MUL;
  767. subn :
  768. op:=OP_SUB;
  769. slashn :
  770. op:=OP_DIV;
  771. else
  772. internalerror(200312231);
  773. end;
  774. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  775. if sqr_sum then
  776. begin
  777. if nf_swapped in flags then
  778. swapleftright;
  779. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  780. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  781. location:=left.location;
  782. if is_double(resultdef) then
  783. begin
  784. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  785. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  786. case nodetype of
  787. addn:
  788. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  789. subn:
  790. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  791. else
  792. internalerror(201108162);
  793. end;
  794. end
  795. else
  796. begin
  797. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  798. { ensure that bits 64..127 contain valid values }
  799. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  800. { the data is now in bits 0..32 and 64..95 }
  801. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  802. case nodetype of
  803. addn:
  804. begin
  805. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  806. end;
  807. subn:
  808. begin
  809. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  810. end;
  811. else
  812. internalerror(201108163);
  813. end;
  814. end
  815. end
  816. { we can use only right as left operand if the operation is commutative }
  817. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  818. begin
  819. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  820. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  821. { force floating point reg. location to be written to memory,
  822. we don't force it to mm register because writing to memory
  823. allows probably shorter code because there is no direct fpu->mm register
  824. copy instruction
  825. }
  826. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  827. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  828. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  829. end
  830. else
  831. begin
  832. if nf_swapped in flags then
  833. swapleftright;
  834. { force floating point reg. location to be written to memory,
  835. we don't force it to mm register because writing to memory
  836. allows probably shorter code because there is no direct fpu->mm register
  837. copy instruction
  838. }
  839. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  840. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  841. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  842. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  843. { force floating point reg. location to be written to memory,
  844. we don't force it to mm register because writing to memory
  845. allows probably shorter code because there is no direct fpu->mm register
  846. copy instruction
  847. }
  848. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  849. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  850. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  851. end;
  852. end;
  853. procedure tx86addnode.second_addfloatavx;
  854. var
  855. op : topcg;
  856. sqr_sum : boolean;
  857. tmp : tnode;
  858. begin
  859. sqr_sum:=false;
  860. {$ifdef dummy}
  861. if (current_settings.fputype>=fpu_sse3) and
  862. use_vectorfpu(resultdef) and
  863. (nodetype in [addn,subn]) and
  864. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  865. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  866. begin
  867. sqr_sum:=true;
  868. tmp:=tinlinenode(left).left;
  869. tinlinenode(left).left:=nil;
  870. left.free;
  871. left:=tmp;
  872. tmp:=tinlinenode(right).left;
  873. tinlinenode(right).left:=nil;
  874. right.free;
  875. right:=tmp;
  876. end;
  877. {$endif dummy}
  878. pass_left_right;
  879. { fpu operands are always in reversed order on the stack }
  880. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  881. toggleflag(nf_swapped);
  882. if (nf_swapped in flags) then
  883. { can't use swapleftright if both are on the fpu stack, since then }
  884. { both are "R_ST" -> nothing would change -> manually switch }
  885. if (left.location.loc = LOC_FPUREGISTER) and
  886. (right.location.loc = LOC_FPUREGISTER) then
  887. emit_none(A_FXCH,S_NO)
  888. else
  889. swapleftright;
  890. case nodetype of
  891. addn :
  892. op:=OP_ADD;
  893. muln :
  894. op:=OP_MUL;
  895. subn :
  896. op:=OP_SUB;
  897. slashn :
  898. op:=OP_DIV;
  899. else
  900. internalerror(200312231);
  901. end;
  902. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  903. if sqr_sum then
  904. begin
  905. if nf_swapped in flags then
  906. swapleftright;
  907. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  908. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  909. location:=left.location;
  910. if is_double(resultdef) then
  911. begin
  912. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  913. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  914. case nodetype of
  915. addn:
  916. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  917. subn:
  918. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  919. else
  920. internalerror(201108162);
  921. end;
  922. end
  923. else
  924. begin
  925. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  926. { ensure that bits 64..127 contain valid values }
  927. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  928. { the data is now in bits 0..32 and 64..95 }
  929. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  930. case nodetype of
  931. addn:
  932. begin
  933. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  934. end;
  935. subn:
  936. begin
  937. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  938. end;
  939. else
  940. internalerror(201108163);
  941. end;
  942. end
  943. end
  944. { left*2 ? }
  945. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  946. begin
  947. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  948. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  949. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  950. left.location.register,
  951. left.location.register,
  952. location.register,
  953. mms_movescalar);
  954. end
  955. { right*2 ? }
  956. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  957. begin
  958. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  959. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  960. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  961. right.location.register,
  962. right.location.register,
  963. location.register,
  964. mms_movescalar);
  965. end
  966. { we can use only right as left operand if the operation is commutative }
  967. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  968. begin
  969. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  970. { force floating point reg. location to be written to memory,
  971. we don't force it to mm register because writing to memory
  972. allows probably shorter code because there is no direct fpu->mm register
  973. copy instruction
  974. }
  975. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  976. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  977. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  978. left.location,
  979. right.location.register,
  980. location.register,
  981. mms_movescalar);
  982. end
  983. else
  984. begin
  985. if (nf_swapped in flags) then
  986. swapleftright;
  987. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  988. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  989. { force floating point reg. location to be written to memory,
  990. we don't force it to mm register because writing to memory
  991. allows probably shorter code because there is no direct fpu->mm register
  992. copy instruction
  993. }
  994. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  995. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  996. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  997. right.location,
  998. left.location.register,
  999. location.register,
  1000. mms_movescalar);
  1001. end;
  1002. end;
  1003. procedure tx86addnode.second_cmpfloatvector;
  1004. var
  1005. op : tasmop;
  1006. const
  1007. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1008. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1009. begin
  1010. if is_single(left.resultdef) then
  1011. op:=ops_single[UseAVX]
  1012. else if is_double(left.resultdef) then
  1013. op:=ops_double[UseAVX]
  1014. else
  1015. internalerror(200402222);
  1016. pass_left_right;
  1017. location_reset(location,LOC_FLAGS,OS_NO);
  1018. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1019. memory (not to mm registers because one of the memory locations can be used
  1020. directly in compare instruction, yielding shorter code) }
  1021. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1022. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1023. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1024. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1025. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1026. begin
  1027. case left.location.loc of
  1028. LOC_REFERENCE,LOC_CREFERENCE:
  1029. begin
  1030. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1031. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1032. end;
  1033. LOC_MMREGISTER,LOC_CMMREGISTER:
  1034. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1035. else
  1036. internalerror(200402221);
  1037. end;
  1038. toggleflag(nf_swapped);
  1039. end
  1040. else
  1041. begin
  1042. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1043. case right.location.loc of
  1044. LOC_REFERENCE,LOC_CREFERENCE:
  1045. begin
  1046. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1047. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1048. end;
  1049. LOC_MMREGISTER,LOC_CMMREGISTER:
  1050. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1051. else
  1052. internalerror(200402223);
  1053. end;
  1054. end;
  1055. location.resflags:=getfpuresflags;
  1056. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1057. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1058. end;
  1059. procedure tx86addnode.second_opvector;
  1060. var
  1061. op : topcg;
  1062. begin
  1063. pass_left_right;
  1064. if (nf_swapped in flags) then
  1065. swapleftright;
  1066. case nodetype of
  1067. addn :
  1068. op:=OP_ADD;
  1069. muln :
  1070. op:=OP_MUL;
  1071. subn :
  1072. op:=OP_SUB;
  1073. slashn :
  1074. op:=OP_DIV;
  1075. else
  1076. internalerror(200610071);
  1077. end;
  1078. if fits_in_mm_register(left.resultdef) then
  1079. begin
  1080. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1081. { we can use only right as left operand if the operation is commutative }
  1082. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1083. begin
  1084. location.register:=right.location.register;
  1085. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1086. end
  1087. else
  1088. begin
  1089. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1090. location.register:=left.location.register;
  1091. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1092. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1093. end;
  1094. end
  1095. else
  1096. begin
  1097. { not yet supported }
  1098. internalerror(200610072);
  1099. end
  1100. end;
  1101. procedure tx86addnode.second_addfloat;
  1102. const
  1103. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1104. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1105. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1106. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1107. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1108. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1109. var
  1110. op : TAsmOp;
  1111. refnode : tnode;
  1112. hasref : boolean;
  1113. begin
  1114. if use_vectorfpu(resultdef) then
  1115. begin
  1116. if UseAVX then
  1117. second_addfloatavx
  1118. else
  1119. second_addfloatsse;
  1120. exit;
  1121. end;
  1122. pass_left_right;
  1123. prepare_x87_locations(refnode);
  1124. hasref:=assigned(refnode);
  1125. case nodetype of
  1126. addn :
  1127. op:=ops_add[hasref];
  1128. muln :
  1129. op:=ops_mul[hasref];
  1130. subn :
  1131. if (nf_swapped in flags) then
  1132. op:=ops_rsub[hasref]
  1133. else
  1134. op:=ops_sub[hasref];
  1135. slashn :
  1136. if (nf_swapped in flags) then
  1137. op:=ops_rdiv[hasref]
  1138. else
  1139. op:=ops_div[hasref];
  1140. else
  1141. internalerror(2003042214);
  1142. end;
  1143. if hasref then
  1144. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1145. else
  1146. begin
  1147. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1148. tcgx86(cg).dec_fpu_stack;
  1149. end;
  1150. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1151. location.register:=NR_ST;
  1152. end;
  1153. procedure tx86addnode.second_cmpfloat;
  1154. {$ifdef i8086}
  1155. var
  1156. tmpref: treference;
  1157. {$endif i8086}
  1158. begin
  1159. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1160. begin
  1161. second_cmpfloatvector;
  1162. exit;
  1163. end;
  1164. pass_left_right;
  1165. force_left_and_right_fpureg;
  1166. {$ifndef x86_64}
  1167. if current_settings.cputype<cpu_Pentium2 then
  1168. begin
  1169. emit_none(A_FCOMPP,S_NO);
  1170. tcgx86(cg).dec_fpu_stack;
  1171. tcgx86(cg).dec_fpu_stack;
  1172. { load fpu flags }
  1173. {$ifdef i8086}
  1174. if current_settings.cputype < cpu_286 then
  1175. begin
  1176. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1177. emit_ref(A_FSTSW,S_NO,tmpref);
  1178. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1179. inc(tmpref.offset);
  1180. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1181. dec(tmpref.offset);
  1182. emit_none(A_SAHF,S_NO);
  1183. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1184. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1185. end
  1186. else
  1187. {$endif i8086}
  1188. begin
  1189. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1190. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1191. emit_none(A_SAHF,S_NO);
  1192. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1193. end;
  1194. end
  1195. else
  1196. {$endif x86_64}
  1197. begin
  1198. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1199. { fcomip pops only one fpu register }
  1200. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1201. tcgx86(cg).dec_fpu_stack;
  1202. tcgx86(cg).dec_fpu_stack;
  1203. end;
  1204. location_reset(location,LOC_FLAGS,OS_NO);
  1205. location.resflags:=getfpuresflags;
  1206. end;
  1207. {*****************************************************************************
  1208. Add64bit
  1209. *****************************************************************************}
  1210. procedure tx86addnode.second_add64bit;
  1211. begin
  1212. {$ifdef cpu64bitalu}
  1213. second_addordinal;
  1214. {$else cpu64bitalu}
  1215. { must be implemented separate }
  1216. internalerror(200402042);
  1217. {$endif cpu64bitalu}
  1218. end;
  1219. procedure tx86addnode.second_cmp64bit;
  1220. begin
  1221. {$ifdef cpu64bitalu}
  1222. second_cmpordinal;
  1223. {$else cpu64bitalu}
  1224. { must be implemented separate }
  1225. internalerror(200402043);
  1226. {$endif cpu64bitalu}
  1227. end;
  1228. {*****************************************************************************
  1229. AddOrdinal
  1230. *****************************************************************************}
  1231. procedure tx86addnode.second_cmpordinal;
  1232. var
  1233. opdef : tdef;
  1234. opsize : tcgsize;
  1235. unsigned : boolean;
  1236. begin
  1237. unsigned:=not(is_signed(left.resultdef)) or
  1238. not(is_signed(right.resultdef));
  1239. opdef:=left.resultdef;
  1240. opsize:=def_cgsize(opdef);
  1241. pass_left_right;
  1242. if (right.location.loc=LOC_CONSTANT) and
  1243. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1244. {$ifdef x86_64}
  1245. and ((not (opsize in [OS_64,OS_S64])) or (
  1246. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1247. ))
  1248. {$endif x86_64}
  1249. then
  1250. begin
  1251. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1252. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1253. end
  1254. else
  1255. begin
  1256. left_must_be_reg(opdef,opsize,false);
  1257. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1258. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1259. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1260. end;
  1261. location_reset(location,LOC_FLAGS,OS_NO);
  1262. location.resflags:=getresflags(unsigned);
  1263. end;
  1264. begin
  1265. caddnode:=tx86addnode;
  1266. end.