nx86add.pas 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure force_left_and_right_fpureg;
  31. procedure prepare_x87_locations(out refnode: tnode);
  32. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  33. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  34. procedure second_cmpfloatvector;
  35. procedure second_addfloatsse;
  36. procedure second_addfloatavx;
  37. public
  38. procedure second_addfloat;override;
  39. {$ifndef i8086}
  40. procedure second_addsmallset;override;
  41. {$endif not i8086}
  42. procedure second_add64bit;override;
  43. procedure second_cmpfloat;override;
  44. procedure second_cmpsmallset;override;
  45. procedure second_cmp64bit;override;
  46. procedure second_cmpordinal;override;
  47. {$ifdef SUPPORT_MMX}
  48. procedure second_opmmx;override;
  49. {$endif SUPPORT_MMX}
  50. procedure second_opvector;override;
  51. end;
  52. implementation
  53. uses
  54. globtype,globals,
  55. verbose,cutils,
  56. cpuinfo,
  57. aasmbase,aasmtai,aasmdata,aasmcpu,
  58. symconst,symdef,
  59. cgobj,hlcgobj,cgx86,cga,cgutils,
  60. paramgr,tgobj,ncgutil,
  61. ncon,nset,ninl,
  62. defutil;
  63. {*****************************************************************************
  64. Helpers
  65. *****************************************************************************}
  66. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  67. var
  68. power : longint;
  69. hl4 : tasmlabel;
  70. r : Tregister;
  71. href : treference;
  72. begin
  73. { at this point, left.location.loc should be LOC_REGISTER }
  74. if right.location.loc=LOC_REGISTER then
  75. begin
  76. { right.location is a LOC_REGISTER }
  77. { when swapped another result register }
  78. if (nodetype=subn) and (nf_swapped in flags) then
  79. begin
  80. if extra_not then
  81. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  82. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  83. { newly swapped also set swapped flag }
  84. location_swap(left.location,right.location);
  85. toggleflag(nf_swapped);
  86. end
  87. else
  88. begin
  89. if extra_not then
  90. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  91. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  92. location_swap(left.location,right.location);
  93. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  94. end;
  95. end
  96. else
  97. begin
  98. { right.location is not a LOC_REGISTER }
  99. if (nodetype=subn) and (nf_swapped in flags) then
  100. begin
  101. if extra_not then
  102. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  103. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  104. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  105. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  106. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  107. end
  108. else
  109. begin
  110. { Optimizations when right.location is a constant value }
  111. if (op=A_CMP) and
  112. (nodetype in [equaln,unequaln]) and
  113. (right.location.loc=LOC_CONSTANT) and
  114. (right.location.value=0) then
  115. begin
  116. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  117. end
  118. else
  119. if (op=A_ADD) and
  120. (right.location.loc=LOC_CONSTANT) and
  121. (right.location.value=1) and
  122. not(cs_check_overflow in current_settings.localswitches) then
  123. begin
  124. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  125. end
  126. else
  127. if (op=A_SUB) and
  128. (right.location.loc=LOC_CONSTANT) and
  129. (right.location.value=1) and
  130. not(cs_check_overflow in current_settings.localswitches) and
  131. UseIncDec then
  132. begin
  133. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  134. end
  135. else
  136. if (op=A_IMUL) and
  137. (right.location.loc=LOC_CONSTANT) and
  138. (ispowerof2(int64(right.location.value),power)) and
  139. not(cs_check_overflow in current_settings.localswitches) then
  140. begin
  141. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  142. end
  143. else if (op=A_IMUL) and
  144. (right.location.loc=LOC_CONSTANT) and
  145. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  146. (power in [1..3]) and
  147. not(cs_check_overflow in current_settings.localswitches) then
  148. begin
  149. reference_reset_base(href,left.location.register,0,0);
  150. href.index:=left.location.register;
  151. href.scalefactor:=int64(right.location.value)-1;
  152. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  153. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  154. end
  155. else
  156. begin
  157. if extra_not then
  158. begin
  159. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  160. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  161. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  162. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  163. end
  164. else
  165. begin
  166. emit_op_right_left(op,opsize);
  167. end;
  168. end;
  169. end;
  170. end;
  171. { only in case of overflow operations }
  172. { produce overflow code }
  173. { we must put it here directly, because sign of operation }
  174. { is in unsigned VAR!! }
  175. if mboverflow then
  176. begin
  177. if cs_check_overflow in current_settings.localswitches then
  178. begin
  179. current_asmdata.getjumplabel(hl4);
  180. if unsigned then
  181. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  182. else
  183. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  184. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  185. cg.a_label(current_asmdata.CurrAsmList,hl4);
  186. end;
  187. end;
  188. end;
  189. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  190. begin
  191. { left location is not a register? }
  192. if (left.location.loc<>LOC_REGISTER) then
  193. begin
  194. { if right is register then we can swap the locations }
  195. if (not noswap) and
  196. (right.location.loc=LOC_REGISTER) then
  197. begin
  198. location_swap(left.location,right.location);
  199. toggleflag(nf_swapped);
  200. end
  201. else
  202. begin
  203. { maybe we can reuse a constant register when the
  204. operation is a comparison that doesn't change the
  205. value of the register }
  206. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  207. end;
  208. end;
  209. if (right.location.loc<>LOC_CONSTANT) and
  210. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  211. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  212. if (left.location.loc<>LOC_CONSTANT) and
  213. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  214. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  215. end;
  216. procedure tx86addnode.force_left_and_right_fpureg;
  217. begin
  218. if (right.location.loc<>LOC_FPUREGISTER) then
  219. begin
  220. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  221. if (left.location.loc<>LOC_FPUREGISTER) then
  222. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  223. else
  224. { left was on the stack => swap }
  225. toggleflag(nf_swapped);
  226. end
  227. { the nominator in st0 }
  228. else if (left.location.loc<>LOC_FPUREGISTER) then
  229. begin
  230. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  231. end
  232. else
  233. begin
  234. { fpu operands are always in the wrong order on the stack }
  235. toggleflag(nf_swapped);
  236. end;
  237. end;
  238. { Makes sides suitable for executing an x87 instruction:
  239. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  240. everything else is loaded to FPU stack. }
  241. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  242. begin
  243. refnode:=nil;
  244. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  245. 0:
  246. begin
  247. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  248. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  249. InternalError(2013090803);
  250. if (left.location.size in [OS_F32,OS_F64]) then
  251. begin
  252. refnode:=left;
  253. toggleflag(nf_swapped);
  254. end
  255. else
  256. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  257. end;
  258. 1:
  259. begin { if left is on the stack then swap. }
  260. if (left.location.loc=LOC_FPUREGISTER) then
  261. refnode:=right
  262. else
  263. refnode:=left;
  264. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  265. InternalError(2013090801);
  266. if not (refnode.location.size in [OS_F32,OS_F64]) then
  267. begin
  268. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  269. if (refnode=right) then
  270. toggleflag(nf_swapped);
  271. refnode:=nil;
  272. end
  273. else
  274. begin
  275. if (refnode=left) then
  276. toggleflag(nf_swapped);
  277. end;
  278. end;
  279. 2: { fpu operands are always in the wrong order on the stack }
  280. toggleflag(nf_swapped);
  281. else
  282. InternalError(2013090802);
  283. end;
  284. end;
  285. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  286. {$ifdef x86_64}
  287. var
  288. tmpreg : tregister;
  289. {$endif x86_64}
  290. begin
  291. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  292. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  293. { left must be a register }
  294. case right.location.loc of
  295. LOC_REGISTER,
  296. LOC_CREGISTER :
  297. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  298. LOC_REFERENCE,
  299. LOC_CREFERENCE :
  300. begin
  301. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  302. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  303. end;
  304. LOC_CONSTANT :
  305. begin
  306. {$ifdef x86_64}
  307. { x86_64 only supports signed 32 bits constants directly }
  308. if (opsize in [OS_S64,OS_64]) and
  309. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  310. begin
  311. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  312. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  313. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  314. end
  315. else
  316. {$endif x86_64}
  317. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  318. end;
  319. else
  320. internalerror(200203232);
  321. end;
  322. end;
  323. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  324. begin
  325. case nodetype of
  326. equaln : getresflags:=F_E;
  327. unequaln : getresflags:=F_NE;
  328. else
  329. if not(unsigned) then
  330. begin
  331. if nf_swapped in flags then
  332. case nodetype of
  333. ltn : getresflags:=F_G;
  334. lten : getresflags:=F_GE;
  335. gtn : getresflags:=F_L;
  336. gten : getresflags:=F_LE;
  337. else
  338. internalerror(2013120105);
  339. end
  340. else
  341. case nodetype of
  342. ltn : getresflags:=F_L;
  343. lten : getresflags:=F_LE;
  344. gtn : getresflags:=F_G;
  345. gten : getresflags:=F_GE;
  346. else
  347. internalerror(2013120106);
  348. end;
  349. end
  350. else
  351. begin
  352. if nf_swapped in flags then
  353. case nodetype of
  354. ltn : getresflags:=F_A;
  355. lten : getresflags:=F_AE;
  356. gtn : getresflags:=F_B;
  357. gten : getresflags:=F_BE;
  358. else
  359. internalerror(2013120107);
  360. end
  361. else
  362. case nodetype of
  363. ltn : getresflags:=F_B;
  364. lten : getresflags:=F_BE;
  365. gtn : getresflags:=F_A;
  366. gten : getresflags:=F_AE;
  367. else
  368. internalerror(2013120108);
  369. end;
  370. end;
  371. end;
  372. end;
  373. {*****************************************************************************
  374. AddSmallSet
  375. *****************************************************************************}
  376. {$ifndef i8086}
  377. procedure tx86addnode.second_addsmallset;
  378. var
  379. setbase : aint;
  380. opdef : tdef;
  381. opsize : TCGSize;
  382. op : TAsmOp;
  383. extra_not,
  384. noswap : boolean;
  385. all_member_optimization:boolean;
  386. begin
  387. pass_left_right;
  388. noswap:=false;
  389. extra_not:=false;
  390. all_member_optimization:=false;
  391. opdef:=resultdef;
  392. opsize:=int_cgsize(opdef.size);
  393. if (left.resultdef.typ=setdef) then
  394. setbase:=tsetdef(left.resultdef).setbase
  395. else
  396. setbase:=tsetdef(right.resultdef).setbase;
  397. case nodetype of
  398. addn :
  399. begin
  400. { adding elements is not commutative }
  401. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  402. swapleftright;
  403. { are we adding set elements ? }
  404. if right.nodetype=setelementn then
  405. begin
  406. { no range support for smallsets! }
  407. if assigned(tsetelementnode(right).right) then
  408. internalerror(43244);
  409. { btsb isn't supported }
  410. if opsize=OS_8 then
  411. begin
  412. opsize:=OS_32;
  413. opdef:=u32inttype;
  414. end;
  415. { bts requires both elements to be registers }
  416. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  417. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  418. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  419. op:=A_BTS;
  420. noswap:=true;
  421. end
  422. else
  423. op:=A_OR;
  424. end;
  425. symdifn :
  426. op:=A_XOR;
  427. muln :
  428. op:=A_AND;
  429. subn :
  430. begin
  431. op:=A_AND;
  432. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  433. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  434. all_member_optimization:=true;
  435. if (not(nf_swapped in flags)) and
  436. (right.location.loc=LOC_CONSTANT) then
  437. right.location.value := not(right.location.value)
  438. else if (nf_swapped in flags) and
  439. (left.location.loc=LOC_CONSTANT) then
  440. left.location.value := not(left.location.value)
  441. else
  442. extra_not:=true;
  443. end;
  444. xorn :
  445. op:=A_XOR;
  446. orn :
  447. op:=A_OR;
  448. andn :
  449. op:=A_AND;
  450. else
  451. internalerror(2003042215);
  452. end;
  453. if all_member_optimization then
  454. begin
  455. {A set expression [0..31]-x can be implemented with a simple NOT.}
  456. if nf_swapped in flags then
  457. begin
  458. { newly swapped also set swapped flag }
  459. location_swap(left.location,right.location);
  460. toggleflag(nf_swapped);
  461. end;
  462. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  463. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  464. location:=right.location;
  465. end
  466. else
  467. begin
  468. { left must be a register }
  469. left_must_be_reg(opdef,opsize,noswap);
  470. emit_generic_code(op,opsize,true,extra_not,false);
  471. location_freetemp(current_asmdata.CurrAsmList,right.location);
  472. { left is always a register and contains the result }
  473. location:=left.location;
  474. end;
  475. { fix the changed opsize we did above because of the missing btsb }
  476. if opsize<>int_cgsize(resultdef.size) then
  477. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  478. end;
  479. {$endif not i8086}
  480. procedure tx86addnode.second_cmpsmallset;
  481. var
  482. opdef : tdef;
  483. opsize : TCGSize;
  484. op : TAsmOp;
  485. begin
  486. pass_left_right;
  487. opdef:=left.resultdef;
  488. opsize:=int_cgsize(opdef.size);
  489. case nodetype of
  490. equaln,
  491. unequaln :
  492. op:=A_CMP;
  493. lten,gten:
  494. begin
  495. if (not(nf_swapped in flags) and (nodetype = lten)) or
  496. ((nf_swapped in flags) and (nodetype = gten)) then
  497. swapleftright;
  498. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  499. emit_op_right_left(A_AND,opsize);
  500. op:=A_CMP;
  501. { warning: ugly hack, we need a JE so change the node to equaln }
  502. nodetype:=equaln;
  503. end;
  504. else
  505. internalerror(2003042215);
  506. end;
  507. { left must be a register }
  508. left_must_be_reg(opdef,opsize,false);
  509. emit_generic_code(op,opsize,true,false,false);
  510. location_freetemp(current_asmdata.CurrAsmList,right.location);
  511. location_freetemp(current_asmdata.CurrAsmList,left.location);
  512. location_reset(location,LOC_FLAGS,OS_NO);
  513. location.resflags:=getresflags(true);
  514. end;
  515. {*****************************************************************************
  516. AddMMX
  517. *****************************************************************************}
  518. {$ifdef SUPPORT_MMX}
  519. procedure tx86addnode.second_opmmx;
  520. var
  521. op : TAsmOp;
  522. cmpop : boolean;
  523. mmxbase : tmmxtype;
  524. hreg,
  525. hregister : tregister;
  526. begin
  527. pass_left_right;
  528. cmpop:=false;
  529. mmxbase:=mmx_type(left.resultdef);
  530. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  531. case nodetype of
  532. addn :
  533. begin
  534. if (cs_mmx_saturation in current_settings.localswitches) then
  535. begin
  536. case mmxbase of
  537. mmxs8bit:
  538. op:=A_PADDSB;
  539. mmxu8bit:
  540. op:=A_PADDUSB;
  541. mmxs16bit,mmxfixed16:
  542. op:=A_PADDSW;
  543. mmxu16bit:
  544. op:=A_PADDUSW;
  545. end;
  546. end
  547. else
  548. begin
  549. case mmxbase of
  550. mmxs8bit,mmxu8bit:
  551. op:=A_PADDB;
  552. mmxs16bit,mmxu16bit,mmxfixed16:
  553. op:=A_PADDW;
  554. mmxs32bit,mmxu32bit:
  555. op:=A_PADDD;
  556. end;
  557. end;
  558. end;
  559. muln :
  560. begin
  561. case mmxbase of
  562. mmxs16bit,mmxu16bit:
  563. op:=A_PMULLW;
  564. mmxfixed16:
  565. op:=A_PMULHW;
  566. end;
  567. end;
  568. subn :
  569. begin
  570. if (cs_mmx_saturation in current_settings.localswitches) then
  571. begin
  572. case mmxbase of
  573. mmxs8bit:
  574. op:=A_PSUBSB;
  575. mmxu8bit:
  576. op:=A_PSUBUSB;
  577. mmxs16bit,mmxfixed16:
  578. op:=A_PSUBSB;
  579. mmxu16bit:
  580. op:=A_PSUBUSW;
  581. end;
  582. end
  583. else
  584. begin
  585. case mmxbase of
  586. mmxs8bit,mmxu8bit:
  587. op:=A_PSUBB;
  588. mmxs16bit,mmxu16bit,mmxfixed16:
  589. op:=A_PSUBW;
  590. mmxs32bit,mmxu32bit:
  591. op:=A_PSUBD;
  592. end;
  593. end;
  594. end;
  595. xorn:
  596. op:=A_PXOR;
  597. orn:
  598. op:=A_POR;
  599. andn:
  600. op:=A_PAND;
  601. else
  602. internalerror(2003042214);
  603. end;
  604. { left and right no register? }
  605. { then one must be demanded }
  606. if (left.location.loc<>LOC_MMXREGISTER) then
  607. begin
  608. if (right.location.loc=LOC_MMXREGISTER) then
  609. begin
  610. location_swap(left.location,right.location);
  611. toggleflag(nf_swapped);
  612. end
  613. else
  614. begin
  615. { register variable ? }
  616. if (left.location.loc=LOC_CMMXREGISTER) then
  617. begin
  618. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  619. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  620. end
  621. else
  622. begin
  623. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  624. internalerror(200203245);
  625. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  626. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  627. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  628. end;
  629. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  630. left.location.register:=hregister;
  631. end;
  632. end;
  633. { at this point, left.location.loc should be LOC_MMXREGISTER }
  634. if right.location.loc<>LOC_MMXREGISTER then
  635. begin
  636. if (nodetype=subn) and (nf_swapped in flags) then
  637. begin
  638. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  639. if right.location.loc=LOC_CMMXREGISTER then
  640. begin
  641. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  642. emit_reg_reg(op,S_NO,left.location.register,hreg);
  643. end
  644. else
  645. begin
  646. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  647. internalerror(200203247);
  648. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  649. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  650. emit_reg_reg(op,S_NO,left.location.register,hreg);
  651. end;
  652. location.register:=hreg;
  653. end
  654. else
  655. begin
  656. if (right.location.loc=LOC_CMMXREGISTER) then
  657. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  658. else
  659. begin
  660. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  661. internalerror(200203246);
  662. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  663. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  664. end;
  665. location.register:=left.location.register;
  666. end;
  667. end
  668. else
  669. begin
  670. { right.location=LOC_MMXREGISTER }
  671. if (nodetype=subn) and (nf_swapped in flags) then
  672. begin
  673. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  674. location_swap(left.location,right.location);
  675. toggleflag(nf_swapped);
  676. end
  677. else
  678. begin
  679. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  680. end;
  681. location.register:=left.location.register;
  682. end;
  683. location_freetemp(current_asmdata.CurrAsmList,right.location);
  684. if cmpop then
  685. location_freetemp(current_asmdata.CurrAsmList,left.location);
  686. end;
  687. {$endif SUPPORT_MMX}
  688. {*****************************************************************************
  689. AddFloat
  690. *****************************************************************************}
  691. procedure tx86addnode.second_addfloatsse;
  692. var
  693. op : topcg;
  694. sqr_sum : boolean;
  695. tmp : tnode;
  696. begin
  697. sqr_sum:=false;
  698. if (current_settings.fputype>=fpu_sse3) and
  699. use_vectorfpu(resultdef) and
  700. (nodetype in [addn,subn]) and
  701. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  702. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  703. begin
  704. sqr_sum:=true;
  705. tmp:=tinlinenode(left).left;
  706. tinlinenode(left).left:=nil;
  707. left.free;
  708. left:=tmp;
  709. tmp:=tinlinenode(right).left;
  710. tinlinenode(right).left:=nil;
  711. right.free;
  712. right:=tmp;
  713. end;
  714. pass_left_right;
  715. { fpu operands are always in reversed order on the stack }
  716. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  717. toggleflag(nf_swapped);
  718. if (nf_swapped in flags) then
  719. { can't use swapleftright if both are on the fpu stack, since then }
  720. { both are "R_ST" -> nothing would change -> manually switch }
  721. if (left.location.loc = LOC_FPUREGISTER) and
  722. (right.location.loc = LOC_FPUREGISTER) then
  723. emit_none(A_FXCH,S_NO)
  724. else
  725. swapleftright;
  726. case nodetype of
  727. addn :
  728. op:=OP_ADD;
  729. muln :
  730. op:=OP_MUL;
  731. subn :
  732. op:=OP_SUB;
  733. slashn :
  734. op:=OP_DIV;
  735. else
  736. internalerror(200312231);
  737. end;
  738. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  739. if sqr_sum then
  740. begin
  741. if nf_swapped in flags then
  742. swapleftright;
  743. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  744. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  745. location:=left.location;
  746. if is_double(resultdef) then
  747. begin
  748. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  749. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  750. case nodetype of
  751. addn:
  752. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  753. subn:
  754. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  755. else
  756. internalerror(201108162);
  757. end;
  758. end
  759. else
  760. begin
  761. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  762. { ensure that bits 64..127 contain valid values }
  763. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  764. { the data is now in bits 0..32 and 64..95 }
  765. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  766. case nodetype of
  767. addn:
  768. begin
  769. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  770. end;
  771. subn:
  772. begin
  773. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  774. end;
  775. else
  776. internalerror(201108163);
  777. end;
  778. end
  779. end
  780. { we can use only right as left operand if the operation is commutative }
  781. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  782. begin
  783. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  784. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  785. { force floating point reg. location to be written to memory,
  786. we don't force it to mm register because writing to memory
  787. allows probably shorter code because there is no direct fpu->mm register
  788. copy instruction
  789. }
  790. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  791. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  792. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  793. end
  794. else
  795. begin
  796. if nf_swapped in flags then
  797. swapleftright;
  798. { force floating point reg. location to be written to memory,
  799. we don't force it to mm register because writing to memory
  800. allows probably shorter code because there is no direct fpu->mm register
  801. copy instruction
  802. }
  803. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  804. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  805. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  806. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  807. { force floating point reg. location to be written to memory,
  808. we don't force it to mm register because writing to memory
  809. allows probably shorter code because there is no direct fpu->mm register
  810. copy instruction
  811. }
  812. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  813. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  814. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  815. end;
  816. end;
  817. procedure tx86addnode.second_addfloatavx;
  818. var
  819. op : topcg;
  820. sqr_sum : boolean;
  821. tmp : tnode;
  822. begin
  823. sqr_sum:=false;
  824. {$ifdef dummy}
  825. if (current_settings.fputype>=fpu_sse3) and
  826. use_vectorfpu(resultdef) and
  827. (nodetype in [addn,subn]) and
  828. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  829. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  830. begin
  831. sqr_sum:=true;
  832. tmp:=tinlinenode(left).left;
  833. tinlinenode(left).left:=nil;
  834. left.free;
  835. left:=tmp;
  836. tmp:=tinlinenode(right).left;
  837. tinlinenode(right).left:=nil;
  838. right.free;
  839. right:=tmp;
  840. end;
  841. {$endif dummy}
  842. pass_left_right;
  843. { fpu operands are always in reversed order on the stack }
  844. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  845. toggleflag(nf_swapped);
  846. if (nf_swapped in flags) then
  847. { can't use swapleftright if both are on the fpu stack, since then }
  848. { both are "R_ST" -> nothing would change -> manually switch }
  849. if (left.location.loc = LOC_FPUREGISTER) and
  850. (right.location.loc = LOC_FPUREGISTER) then
  851. emit_none(A_FXCH,S_NO)
  852. else
  853. swapleftright;
  854. case nodetype of
  855. addn :
  856. op:=OP_ADD;
  857. muln :
  858. op:=OP_MUL;
  859. subn :
  860. op:=OP_SUB;
  861. slashn :
  862. op:=OP_DIV;
  863. else
  864. internalerror(200312231);
  865. end;
  866. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  867. if sqr_sum then
  868. begin
  869. if nf_swapped in flags then
  870. swapleftright;
  871. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  872. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  873. location:=left.location;
  874. if is_double(resultdef) then
  875. begin
  876. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  877. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  878. case nodetype of
  879. addn:
  880. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  881. subn:
  882. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  883. else
  884. internalerror(201108162);
  885. end;
  886. end
  887. else
  888. begin
  889. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  890. { ensure that bits 64..127 contain valid values }
  891. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  892. { the data is now in bits 0..32 and 64..95 }
  893. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  894. case nodetype of
  895. addn:
  896. begin
  897. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  898. end;
  899. subn:
  900. begin
  901. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  902. end;
  903. else
  904. internalerror(201108163);
  905. end;
  906. end
  907. end
  908. { left*2 ? }
  909. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  910. begin
  911. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  912. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  913. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  914. left.location.register,
  915. left.location.register,
  916. location.register,
  917. mms_movescalar);
  918. end
  919. { right*2 ? }
  920. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  921. begin
  922. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  923. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  924. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  925. right.location.register,
  926. right.location.register,
  927. location.register,
  928. mms_movescalar);
  929. end
  930. { we can use only right as left operand if the operation is commutative }
  931. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  932. begin
  933. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  934. { force floating point reg. location to be written to memory,
  935. we don't force it to mm register because writing to memory
  936. allows probably shorter code because there is no direct fpu->mm register
  937. copy instruction
  938. }
  939. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  940. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  941. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  942. left.location,
  943. right.location.register,
  944. location.register,
  945. mms_movescalar);
  946. end
  947. else
  948. begin
  949. if (nf_swapped in flags) then
  950. swapleftright;
  951. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  952. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  953. { force floating point reg. location to be written to memory,
  954. we don't force it to mm register because writing to memory
  955. allows probably shorter code because there is no direct fpu->mm register
  956. copy instruction
  957. }
  958. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  959. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  960. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  961. right.location,
  962. left.location.register,
  963. location.register,
  964. mms_movescalar);
  965. end;
  966. end;
  967. procedure tx86addnode.second_cmpfloatvector;
  968. var
  969. op : tasmop;
  970. const
  971. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  972. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  973. begin
  974. if is_single(left.resultdef) then
  975. op:=ops_single[UseAVX]
  976. else if is_double(left.resultdef) then
  977. op:=ops_double[UseAVX]
  978. else
  979. internalerror(200402222);
  980. pass_left_right;
  981. location_reset(location,LOC_FLAGS,OS_NO);
  982. { Direct move fpu->mm register is not possible, so force any fpu operands to
  983. memory (not to mm registers because one of the memory locations can be used
  984. directly in compare instruction, yielding shorter code) }
  985. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  986. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  987. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  988. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  989. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  990. begin
  991. case left.location.loc of
  992. LOC_REFERENCE,LOC_CREFERENCE:
  993. begin
  994. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  995. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  996. end;
  997. LOC_MMREGISTER,LOC_CMMREGISTER:
  998. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  999. else
  1000. internalerror(200402221);
  1001. end;
  1002. toggleflag(nf_swapped);
  1003. end
  1004. else
  1005. begin
  1006. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1007. case right.location.loc of
  1008. LOC_REFERENCE,LOC_CREFERENCE:
  1009. begin
  1010. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1011. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1012. end;
  1013. LOC_MMREGISTER,LOC_CMMREGISTER:
  1014. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1015. else
  1016. internalerror(200402223);
  1017. end;
  1018. end;
  1019. location.resflags:=getresflags(true);
  1020. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1021. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1022. end;
  1023. procedure tx86addnode.second_opvector;
  1024. var
  1025. op : topcg;
  1026. begin
  1027. pass_left_right;
  1028. if (nf_swapped in flags) then
  1029. swapleftright;
  1030. case nodetype of
  1031. addn :
  1032. op:=OP_ADD;
  1033. muln :
  1034. op:=OP_MUL;
  1035. subn :
  1036. op:=OP_SUB;
  1037. slashn :
  1038. op:=OP_DIV;
  1039. else
  1040. internalerror(200610071);
  1041. end;
  1042. if fits_in_mm_register(left.resultdef) then
  1043. begin
  1044. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1045. { we can use only right as left operand if the operation is commutative }
  1046. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1047. begin
  1048. location.register:=right.location.register;
  1049. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1050. end
  1051. else
  1052. begin
  1053. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1054. location.register:=left.location.register;
  1055. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1056. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1057. end;
  1058. end
  1059. else
  1060. begin
  1061. { not yet supported }
  1062. internalerror(200610072);
  1063. end
  1064. end;
  1065. procedure tx86addnode.second_addfloat;
  1066. const
  1067. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1068. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1069. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1070. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1071. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1072. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1073. var
  1074. op : TAsmOp;
  1075. refnode : tnode;
  1076. hasref : boolean;
  1077. begin
  1078. if use_vectorfpu(resultdef) then
  1079. begin
  1080. if UseAVX then
  1081. second_addfloatavx
  1082. else
  1083. second_addfloatsse;
  1084. exit;
  1085. end;
  1086. pass_left_right;
  1087. prepare_x87_locations(refnode);
  1088. hasref:=assigned(refnode);
  1089. case nodetype of
  1090. addn :
  1091. op:=ops_add[hasref];
  1092. muln :
  1093. op:=ops_mul[hasref];
  1094. subn :
  1095. if (nf_swapped in flags) then
  1096. op:=ops_rsub[hasref]
  1097. else
  1098. op:=ops_sub[hasref];
  1099. slashn :
  1100. if (nf_swapped in flags) then
  1101. op:=ops_rdiv[hasref]
  1102. else
  1103. op:=ops_div[hasref];
  1104. else
  1105. internalerror(2003042214);
  1106. end;
  1107. if hasref then
  1108. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1109. else
  1110. begin
  1111. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1112. tcgx86(cg).dec_fpu_stack;
  1113. end;
  1114. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1115. location.register:=NR_ST;
  1116. end;
  1117. procedure tx86addnode.second_cmpfloat;
  1118. {$ifdef i8086}
  1119. var
  1120. tmpref: treference;
  1121. {$endif i8086}
  1122. begin
  1123. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1124. begin
  1125. second_cmpfloatvector;
  1126. exit;
  1127. end;
  1128. pass_left_right;
  1129. force_left_and_right_fpureg;
  1130. {$ifndef x86_64}
  1131. if current_settings.cputype<cpu_Pentium2 then
  1132. begin
  1133. emit_none(A_FCOMPP,S_NO);
  1134. tcgx86(cg).dec_fpu_stack;
  1135. tcgx86(cg).dec_fpu_stack;
  1136. { load fpu flags }
  1137. {$ifdef i8086}
  1138. if current_settings.cputype < cpu_286 then
  1139. begin
  1140. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1141. emit_ref(A_FSTSW,S_NO,tmpref);
  1142. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1143. inc(tmpref.offset);
  1144. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1145. dec(tmpref.offset);
  1146. emit_none(A_SAHF,S_NO);
  1147. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1148. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1149. end
  1150. else
  1151. {$endif i8086}
  1152. begin
  1153. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1154. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1155. emit_none(A_SAHF,S_NO);
  1156. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1157. end;
  1158. end
  1159. else
  1160. {$endif x86_64}
  1161. begin
  1162. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1163. { fcomip pops only one fpu register }
  1164. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1165. tcgx86(cg).dec_fpu_stack;
  1166. tcgx86(cg).dec_fpu_stack;
  1167. end;
  1168. location_reset(location,LOC_FLAGS,OS_NO);
  1169. location.resflags:=getresflags(true);
  1170. end;
  1171. {*****************************************************************************
  1172. Add64bit
  1173. *****************************************************************************}
  1174. procedure tx86addnode.second_add64bit;
  1175. begin
  1176. {$ifdef cpu64bitalu}
  1177. second_addordinal;
  1178. {$else cpu64bitalu}
  1179. { must be implemented separate }
  1180. internalerror(200402042);
  1181. {$endif cpu64bitalu}
  1182. end;
  1183. procedure tx86addnode.second_cmp64bit;
  1184. begin
  1185. {$ifdef cpu64bitalu}
  1186. second_cmpordinal;
  1187. {$else cpu64bitalu}
  1188. { must be implemented separate }
  1189. internalerror(200402043);
  1190. {$endif cpu64bitalu}
  1191. end;
  1192. {*****************************************************************************
  1193. AddOrdinal
  1194. *****************************************************************************}
  1195. procedure tx86addnode.second_cmpordinal;
  1196. var
  1197. opdef : tdef;
  1198. opsize : tcgsize;
  1199. unsigned : boolean;
  1200. begin
  1201. unsigned:=not(is_signed(left.resultdef)) or
  1202. not(is_signed(right.resultdef));
  1203. opdef:=left.resultdef;
  1204. opsize:=def_cgsize(opdef);
  1205. pass_left_right;
  1206. if (right.location.loc=LOC_CONSTANT) and
  1207. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1208. {$ifdef x86_64}
  1209. and ((not (opsize in [OS_64,OS_S64])) or (
  1210. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1211. ))
  1212. {$endif x86_64}
  1213. then
  1214. begin
  1215. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1216. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1217. end
  1218. else
  1219. begin
  1220. left_must_be_reg(opdef,opsize,false);
  1221. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1222. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1223. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1224. end;
  1225. location_reset(location,LOC_FLAGS,OS_NO);
  1226. location.resflags:=getresflags(unsigned);
  1227. end;
  1228. begin
  1229. caddnode:=tx86addnode;
  1230. end.