nx86add.pas 63 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. procedure second_addordinal;override;
  50. {$ifdef SUPPORT_MMX}
  51. procedure second_opmmx;override;
  52. {$endif SUPPORT_MMX}
  53. procedure second_opvector;override;
  54. end;
  55. implementation
  56. uses
  57. globtype,globals,
  58. verbose,cutils,compinnr,
  59. cpuinfo,
  60. aasmbase,aasmdata,aasmcpu,
  61. symconst,symdef,
  62. cgobj,hlcgobj,cgx86,cga,cgutils,
  63. tgobj,ncgutil,
  64. ncon,nset,ninl,
  65. defutil;
  66. {*****************************************************************************
  67. Helpers
  68. *****************************************************************************}
  69. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  70. var
  71. power : longint;
  72. hl4 : tasmlabel;
  73. r : Tregister;
  74. href : treference;
  75. begin
  76. { at this point, left.location.loc should be LOC_REGISTER }
  77. if right.location.loc=LOC_REGISTER then
  78. begin
  79. { right.location is a LOC_REGISTER }
  80. { when swapped another result register }
  81. if (nodetype=subn) and (nf_swapped in flags) then
  82. begin
  83. if extra_not then
  84. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  85. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  86. { newly swapped also set swapped flag }
  87. location_swap(left.location,right.location);
  88. toggleflag(nf_swapped);
  89. end
  90. else
  91. begin
  92. if extra_not then
  93. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  94. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  95. location_swap(left.location,right.location);
  96. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  97. end;
  98. end
  99. else
  100. begin
  101. { right.location is not a LOC_REGISTER }
  102. if (nodetype=subn) and (nf_swapped in flags) then
  103. begin
  104. if extra_not then
  105. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  106. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  107. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  108. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  109. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  110. end
  111. else
  112. begin
  113. { Optimizations when right.location is a constant value }
  114. if (op=A_CMP) and
  115. (nodetype in [equaln,unequaln]) and
  116. (right.location.loc=LOC_CONSTANT) and
  117. (right.location.value=0) then
  118. begin
  119. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  120. spilling, while 'test %reg,%reg' still requires loading into register.
  121. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  122. peephole optimizer (this optimization is currently available only for i386). }
  123. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  124. {$ifdef i386}
  125. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  126. {$else i386}
  127. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  128. {$endif i386}
  129. end
  130. else
  131. if (op=A_ADD) and
  132. (right.location.loc=LOC_CONSTANT) and
  133. (right.location.value=1) and
  134. not(cs_check_overflow in current_settings.localswitches) and
  135. UseIncDec then
  136. begin
  137. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  138. end
  139. else
  140. if (op=A_SUB) and
  141. (right.location.loc=LOC_CONSTANT) and
  142. (right.location.value=1) and
  143. not(cs_check_overflow in current_settings.localswitches) and
  144. UseIncDec then
  145. begin
  146. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  147. end
  148. else
  149. if (op=A_IMUL) and
  150. (right.location.loc=LOC_CONSTANT) and
  151. (ispowerof2(int64(right.location.value),power)) and
  152. not(cs_check_overflow in current_settings.localswitches) then
  153. begin
  154. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  155. end
  156. else if (op=A_IMUL) and
  157. (right.location.loc=LOC_CONSTANT) and
  158. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  159. (power in [1..3]) and
  160. not(cs_check_overflow in current_settings.localswitches) then
  161. begin
  162. reference_reset_base(href,left.location.register,0,0,[]);
  163. href.index:=left.location.register;
  164. href.scalefactor:=int64(right.location.value)-1;
  165. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  166. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  167. end
  168. else
  169. begin
  170. if extra_not then
  171. begin
  172. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  173. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  174. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  175. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  176. end
  177. else
  178. begin
  179. emit_op_right_left(op,opsize);
  180. end;
  181. end;
  182. end;
  183. end;
  184. { only in case of overflow operations }
  185. { produce overflow code }
  186. { we must put it here directly, because sign of operation }
  187. { is in unsigned VAR!! }
  188. if mboverflow then
  189. begin
  190. if cs_check_overflow in current_settings.localswitches then
  191. begin
  192. current_asmdata.getjumplabel(hl4);
  193. if unsigned then
  194. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  195. else
  196. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  197. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  198. cg.a_label(current_asmdata.CurrAsmList,hl4);
  199. end;
  200. end;
  201. end;
  202. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  203. begin
  204. { left location is not a register? }
  205. if (left.location.loc<>LOC_REGISTER) then
  206. begin
  207. { if right is register then we can swap the locations }
  208. if (not noswap) and
  209. (right.location.loc=LOC_REGISTER) then
  210. begin
  211. location_swap(left.location,right.location);
  212. toggleflag(nf_swapped);
  213. end
  214. else if (not noswap) and
  215. (right.location.loc=LOC_CREGISTER) then
  216. begin
  217. location_swap(left.location,right.location);
  218. toggleflag(nf_swapped);
  219. { maybe we can reuse a constant register when the
  220. operation is a comparison that doesn't change the
  221. value of the register }
  222. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  223. location:=left.location;
  224. end
  225. else
  226. begin
  227. { maybe we can reuse a constant register when the
  228. operation is a comparison that doesn't change the
  229. value of the register }
  230. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  231. end;
  232. end;
  233. if (right.location.loc<>LOC_CONSTANT) and
  234. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  235. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  236. if (left.location.loc<>LOC_CONSTANT) and
  237. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  238. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  239. end;
  240. procedure tx86addnode.force_left_and_right_fpureg;
  241. begin
  242. if (right.location.loc<>LOC_FPUREGISTER) then
  243. begin
  244. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  245. if (left.location.loc<>LOC_FPUREGISTER) then
  246. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  247. else
  248. { left was on the stack => swap }
  249. toggleflag(nf_swapped);
  250. end
  251. { the nominator in st0 }
  252. else if (left.location.loc<>LOC_FPUREGISTER) then
  253. begin
  254. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  255. end
  256. else
  257. begin
  258. { fpu operands are always in the wrong order on the stack }
  259. toggleflag(nf_swapped);
  260. end;
  261. end;
  262. { Makes sides suitable for executing an x87 instruction:
  263. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  264. everything else is loaded to FPU stack. }
  265. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  266. begin
  267. refnode:=nil;
  268. { later on, no mm registers are allowed, so transfer everything to memory here
  269. below it is loaded into an fpu register if neede }
  270. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  271. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  272. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  273. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  274. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  275. 0:
  276. begin
  277. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  278. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  279. InternalError(2013090803);
  280. if (left.location.size in [OS_F32,OS_F64]) then
  281. begin
  282. refnode:=left;
  283. toggleflag(nf_swapped);
  284. end
  285. else
  286. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  287. end;
  288. 1:
  289. begin { if left is on the stack then swap. }
  290. if (left.location.loc=LOC_FPUREGISTER) then
  291. refnode:=right
  292. else
  293. refnode:=left;
  294. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  295. InternalError(2013090801);
  296. if not (refnode.location.size in [OS_F32,OS_F64]) then
  297. begin
  298. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  299. if (refnode=right) then
  300. toggleflag(nf_swapped);
  301. refnode:=nil;
  302. end
  303. else
  304. begin
  305. if (refnode=left) then
  306. toggleflag(nf_swapped);
  307. end;
  308. end;
  309. 2: { fpu operands are always in the wrong order on the stack }
  310. toggleflag(nf_swapped);
  311. else
  312. InternalError(2013090802);
  313. end;
  314. end;
  315. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  316. {$ifdef x86_64}
  317. var
  318. tmpreg : tregister;
  319. {$endif x86_64}
  320. begin
  321. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  322. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  323. { left must be a register }
  324. case right.location.loc of
  325. LOC_REGISTER,
  326. LOC_CREGISTER :
  327. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  328. LOC_REFERENCE,
  329. LOC_CREFERENCE :
  330. begin
  331. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  332. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  333. end;
  334. LOC_CONSTANT :
  335. begin
  336. {$ifdef x86_64}
  337. { x86_64 only supports signed 32 bits constants directly }
  338. if (opsize in [OS_S64,OS_64]) and
  339. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  340. begin
  341. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  342. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  343. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  344. end
  345. else
  346. {$endif x86_64}
  347. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  348. end;
  349. else
  350. internalerror(200203232);
  351. end;
  352. end;
  353. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  354. begin
  355. case nodetype of
  356. equaln : getresflags:=F_E;
  357. unequaln : getresflags:=F_NE;
  358. else
  359. if not(unsigned) then
  360. begin
  361. if nf_swapped in flags then
  362. case nodetype of
  363. ltn : getresflags:=F_G;
  364. lten : getresflags:=F_GE;
  365. gtn : getresflags:=F_L;
  366. gten : getresflags:=F_LE;
  367. else
  368. internalerror(2013120105);
  369. end
  370. else
  371. case nodetype of
  372. ltn : getresflags:=F_L;
  373. lten : getresflags:=F_LE;
  374. gtn : getresflags:=F_G;
  375. gten : getresflags:=F_GE;
  376. else
  377. internalerror(2013120106);
  378. end;
  379. end
  380. else
  381. begin
  382. if nf_swapped in flags then
  383. case nodetype of
  384. ltn : getresflags:=F_A;
  385. lten : getresflags:=F_AE;
  386. gtn : getresflags:=F_B;
  387. gten : getresflags:=F_BE;
  388. else
  389. internalerror(2013120107);
  390. end
  391. else
  392. case nodetype of
  393. ltn : getresflags:=F_B;
  394. lten : getresflags:=F_BE;
  395. gtn : getresflags:=F_A;
  396. gten : getresflags:=F_AE;
  397. else
  398. internalerror(2013120108);
  399. end;
  400. end;
  401. end;
  402. end;
  403. function tx86addnode.getfpuresflags : tresflags;
  404. begin
  405. if (nodetype=equaln) then
  406. result:=F_FE
  407. else if (nodetype=unequaln) then
  408. result:=F_FNE
  409. else if (nf_swapped in flags) then
  410. case nodetype of
  411. ltn : result:=F_FA;
  412. lten : result:=F_FAE;
  413. gtn : result:=F_FB;
  414. gten : result:=F_FBE;
  415. else
  416. internalerror(2014031402);
  417. end
  418. else
  419. case nodetype of
  420. ltn : result:=F_FB;
  421. lten : result:=F_FBE;
  422. gtn : result:=F_FA;
  423. gten : result:=F_FAE;
  424. else
  425. internalerror(2014031403);
  426. end;
  427. end;
  428. {*****************************************************************************
  429. AddSmallSet
  430. *****************************************************************************}
  431. {$ifndef i8086}
  432. procedure tx86addnode.second_addsmallset;
  433. var
  434. setbase : aint;
  435. opdef : tdef;
  436. opsize : TCGSize;
  437. op : TAsmOp;
  438. extra_not,
  439. noswap : boolean;
  440. all_member_optimization:boolean;
  441. begin
  442. pass_left_right;
  443. noswap:=false;
  444. extra_not:=false;
  445. all_member_optimization:=false;
  446. opdef:=resultdef;
  447. opsize:=int_cgsize(opdef.size);
  448. if (left.resultdef.typ=setdef) then
  449. setbase:=tsetdef(left.resultdef).setbase
  450. else
  451. setbase:=tsetdef(right.resultdef).setbase;
  452. case nodetype of
  453. addn :
  454. begin
  455. { adding elements is not commutative }
  456. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  457. swapleftright;
  458. { are we adding set elements ? }
  459. if right.nodetype=setelementn then
  460. begin
  461. { no range support for smallsets! }
  462. if assigned(tsetelementnode(right).right) then
  463. internalerror(43244);
  464. { btsb isn't supported }
  465. if opsize=OS_8 then
  466. begin
  467. opsize:=OS_32;
  468. opdef:=u32inttype;
  469. end;
  470. { bts requires both elements to be registers }
  471. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  472. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  473. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  474. op:=A_BTS;
  475. noswap:=true;
  476. end
  477. else
  478. op:=A_OR;
  479. end;
  480. symdifn :
  481. op:=A_XOR;
  482. muln :
  483. op:=A_AND;
  484. subn :
  485. begin
  486. op:=A_AND;
  487. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  488. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  489. all_member_optimization:=true;
  490. if (not(nf_swapped in flags)) and
  491. (right.location.loc=LOC_CONSTANT) then
  492. right.location.value := not(right.location.value)
  493. else if (nf_swapped in flags) and
  494. (left.location.loc=LOC_CONSTANT) then
  495. left.location.value := not(left.location.value)
  496. else
  497. extra_not:=true;
  498. end;
  499. xorn :
  500. op:=A_XOR;
  501. orn :
  502. op:=A_OR;
  503. andn :
  504. op:=A_AND;
  505. else
  506. internalerror(2003042215);
  507. end;
  508. if all_member_optimization then
  509. begin
  510. {A set expression [0..31]-x can be implemented with a simple NOT.}
  511. if nf_swapped in flags then
  512. begin
  513. { newly swapped also set swapped flag }
  514. location_swap(left.location,right.location);
  515. toggleflag(nf_swapped);
  516. end;
  517. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  518. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  519. location:=right.location;
  520. end
  521. else
  522. begin
  523. { can we use the BMI1 instruction andn? }
  524. if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
  525. (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
  526. begin
  527. location_reset(location,LOC_REGISTER,left.location.size);
  528. location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
  529. if nf_swapped in flags then
  530. begin
  531. location_swap(left.location,right.location);
  532. toggleflag(nf_swapped);
  533. end;
  534. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
  535. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  536. hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
  537. case left.location.loc of
  538. LOC_CREGISTER,LOC_REGISTER:
  539. emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
  540. LOC_CREFERENCE,LOC_REFERENCE:
  541. emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
  542. else
  543. Internalerror(2018040201);
  544. end;
  545. end
  546. else
  547. begin
  548. { left must be a register }
  549. left_must_be_reg(opdef,opsize,noswap);
  550. emit_generic_code(op,opsize,true,extra_not,false);
  551. location_freetemp(current_asmdata.CurrAsmList,right.location);
  552. { left is always a register and contains the result }
  553. location:=left.location;
  554. end;
  555. end;
  556. { fix the changed opsize we did above because of the missing btsb }
  557. if opsize<>int_cgsize(resultdef.size) then
  558. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  559. end;
  560. {$endif not i8086}
  561. procedure tx86addnode.second_cmpsmallset;
  562. var
  563. opdef : tdef;
  564. opsize : TCGSize;
  565. op : TAsmOp;
  566. begin
  567. pass_left_right;
  568. opdef:=left.resultdef;
  569. opsize:=int_cgsize(opdef.size);
  570. case nodetype of
  571. equaln,
  572. unequaln :
  573. op:=A_CMP;
  574. lten,gten:
  575. begin
  576. if (not(nf_swapped in flags) and (nodetype = lten)) or
  577. ((nf_swapped in flags) and (nodetype = gten)) then
  578. swapleftright;
  579. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  580. emit_op_right_left(A_AND,opsize);
  581. op:=A_CMP;
  582. { warning: ugly hack, we need a JE so change the node to equaln }
  583. nodetype:=equaln;
  584. end;
  585. else
  586. internalerror(2003042215);
  587. end;
  588. { left must be a register }
  589. left_must_be_reg(opdef,opsize,false);
  590. emit_generic_code(op,opsize,true,false,false);
  591. location_freetemp(current_asmdata.CurrAsmList,right.location);
  592. location_freetemp(current_asmdata.CurrAsmList,left.location);
  593. location_reset(location,LOC_FLAGS,OS_NO);
  594. location.resflags:=getresflags(true);
  595. end;
  596. {*****************************************************************************
  597. AddMMX
  598. *****************************************************************************}
  599. {$ifdef SUPPORT_MMX}
  600. procedure tx86addnode.second_opmmx;
  601. var
  602. op : TAsmOp;
  603. cmpop : boolean;
  604. mmxbase : tmmxtype;
  605. hreg,
  606. hregister : tregister;
  607. begin
  608. pass_left_right;
  609. cmpop:=false;
  610. op:=A_NOP;
  611. mmxbase:=mmx_type(left.resultdef);
  612. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  613. case nodetype of
  614. addn :
  615. begin
  616. if (cs_mmx_saturation in current_settings.localswitches) then
  617. begin
  618. case mmxbase of
  619. mmxs8bit:
  620. op:=A_PADDSB;
  621. mmxu8bit:
  622. op:=A_PADDUSB;
  623. mmxs16bit,mmxfixed16:
  624. op:=A_PADDSW;
  625. mmxu16bit:
  626. op:=A_PADDUSW;
  627. end;
  628. end
  629. else
  630. begin
  631. case mmxbase of
  632. mmxs8bit,mmxu8bit:
  633. op:=A_PADDB;
  634. mmxs16bit,mmxu16bit,mmxfixed16:
  635. op:=A_PADDW;
  636. mmxs32bit,mmxu32bit:
  637. op:=A_PADDD;
  638. end;
  639. end;
  640. end;
  641. muln :
  642. begin
  643. case mmxbase of
  644. mmxs16bit,mmxu16bit:
  645. op:=A_PMULLW;
  646. mmxfixed16:
  647. op:=A_PMULHW;
  648. end;
  649. end;
  650. subn :
  651. begin
  652. if (cs_mmx_saturation in current_settings.localswitches) then
  653. begin
  654. case mmxbase of
  655. mmxs8bit:
  656. op:=A_PSUBSB;
  657. mmxu8bit:
  658. op:=A_PSUBUSB;
  659. mmxs16bit,mmxfixed16:
  660. op:=A_PSUBSB;
  661. mmxu16bit:
  662. op:=A_PSUBUSW;
  663. end;
  664. end
  665. else
  666. begin
  667. case mmxbase of
  668. mmxs8bit,mmxu8bit:
  669. op:=A_PSUBB;
  670. mmxs16bit,mmxu16bit,mmxfixed16:
  671. op:=A_PSUBW;
  672. mmxs32bit,mmxu32bit:
  673. op:=A_PSUBD;
  674. end;
  675. end;
  676. end;
  677. xorn:
  678. op:=A_PXOR;
  679. orn:
  680. op:=A_POR;
  681. andn:
  682. op:=A_PAND;
  683. else
  684. internalerror(2003042214);
  685. end;
  686. if op = A_NOP then
  687. internalerror(201408201);
  688. { left and right no register? }
  689. { then one must be demanded }
  690. if (left.location.loc<>LOC_MMXREGISTER) then
  691. begin
  692. if (right.location.loc=LOC_MMXREGISTER) then
  693. begin
  694. location_swap(left.location,right.location);
  695. toggleflag(nf_swapped);
  696. end
  697. else
  698. begin
  699. { register variable ? }
  700. if (left.location.loc=LOC_CMMXREGISTER) then
  701. begin
  702. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  703. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  704. end
  705. else
  706. begin
  707. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  708. internalerror(200203245);
  709. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  710. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  711. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  712. end;
  713. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  714. left.location.register:=hregister;
  715. end;
  716. end;
  717. { at this point, left.location.loc should be LOC_MMXREGISTER }
  718. if right.location.loc<>LOC_MMXREGISTER then
  719. begin
  720. if (nodetype=subn) and (nf_swapped in flags) then
  721. begin
  722. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  723. if right.location.loc=LOC_CMMXREGISTER then
  724. begin
  725. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  726. emit_reg_reg(op,S_NO,left.location.register,hreg);
  727. end
  728. else
  729. begin
  730. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  731. internalerror(200203247);
  732. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  733. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  734. emit_reg_reg(op,S_NO,left.location.register,hreg);
  735. end;
  736. location.register:=hreg;
  737. end
  738. else
  739. begin
  740. if (right.location.loc=LOC_CMMXREGISTER) then
  741. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  742. else
  743. begin
  744. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  745. internalerror(200203246);
  746. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  747. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  748. end;
  749. location.register:=left.location.register;
  750. end;
  751. end
  752. else
  753. begin
  754. { right.location=LOC_MMXREGISTER }
  755. if (nodetype=subn) and (nf_swapped in flags) then
  756. begin
  757. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  758. location_swap(left.location,right.location);
  759. toggleflag(nf_swapped);
  760. end
  761. else
  762. begin
  763. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  764. end;
  765. location.register:=left.location.register;
  766. end;
  767. location_freetemp(current_asmdata.CurrAsmList,right.location);
  768. if cmpop then
  769. location_freetemp(current_asmdata.CurrAsmList,left.location);
  770. end;
  771. {$endif SUPPORT_MMX}
  772. {*****************************************************************************
  773. AddFloat
  774. *****************************************************************************}
  775. procedure tx86addnode.second_addfloatsse;
  776. var
  777. op : topcg;
  778. sqr_sum : boolean;
  779. tmp : tnode;
  780. begin
  781. sqr_sum:=false;
  782. if (current_settings.fputype>=fpu_sse3) and
  783. use_vectorfpu(resultdef) and
  784. (nodetype in [addn,subn]) and
  785. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  786. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  787. begin
  788. sqr_sum:=true;
  789. tmp:=tinlinenode(left).left;
  790. tinlinenode(left).left:=nil;
  791. left.free;
  792. left:=tmp;
  793. tmp:=tinlinenode(right).left;
  794. tinlinenode(right).left:=nil;
  795. right.free;
  796. right:=tmp;
  797. end;
  798. pass_left_right;
  799. { fpu operands are always in reversed order on the stack }
  800. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  801. toggleflag(nf_swapped);
  802. if (nf_swapped in flags) then
  803. { can't use swapleftright if both are on the fpu stack, since then }
  804. { both are "R_ST" -> nothing would change -> manually switch }
  805. if (left.location.loc = LOC_FPUREGISTER) and
  806. (right.location.loc = LOC_FPUREGISTER) then
  807. emit_none(A_FXCH,S_NO)
  808. else
  809. swapleftright;
  810. case nodetype of
  811. addn :
  812. op:=OP_ADD;
  813. muln :
  814. op:=OP_MUL;
  815. subn :
  816. op:=OP_SUB;
  817. slashn :
  818. op:=OP_DIV;
  819. else
  820. internalerror(200312231);
  821. end;
  822. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  823. if sqr_sum then
  824. begin
  825. if nf_swapped in flags then
  826. swapleftright;
  827. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  828. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  829. location:=left.location;
  830. if is_double(resultdef) then
  831. begin
  832. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  833. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  834. case nodetype of
  835. addn:
  836. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  837. subn:
  838. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  839. else
  840. internalerror(201108162);
  841. end;
  842. end
  843. else
  844. begin
  845. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  846. { ensure that bits 64..127 contain valid values }
  847. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  848. { the data is now in bits 0..32 and 64..95 }
  849. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  850. case nodetype of
  851. addn:
  852. begin
  853. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  854. end;
  855. subn:
  856. begin
  857. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  858. end;
  859. else
  860. internalerror(201108163);
  861. end;
  862. end
  863. end
  864. { we can use only right as left operand if the operation is commutative }
  865. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  866. begin
  867. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  868. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  869. { force floating point reg. location to be written to memory,
  870. we don't force it to mm register because writing to memory
  871. allows probably shorter code because there is no direct fpu->mm register
  872. copy instruction
  873. }
  874. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  875. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  876. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  877. end
  878. else
  879. begin
  880. if nf_swapped in flags then
  881. swapleftright;
  882. { force floating point reg. location to be written to memory,
  883. we don't force it to mm register because writing to memory
  884. allows probably shorter code because there is no direct fpu->mm register
  885. copy instruction
  886. }
  887. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  888. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  889. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  890. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  891. { force floating point reg. location to be written to memory,
  892. we don't force it to mm register because writing to memory
  893. allows probably shorter code because there is no direct fpu->mm register
  894. copy instruction
  895. }
  896. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  897. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  898. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  899. end;
  900. end;
  901. procedure tx86addnode.second_addfloatavx;
  902. var
  903. op : topcg;
  904. sqr_sum : boolean;
  905. {$ifdef dummy}
  906. tmp : tnode;
  907. {$endif dummy}
  908. begin
  909. sqr_sum:=false;
  910. {$ifdef dummy}
  911. if (current_settings.fputype>=fpu_sse3) and
  912. use_vectorfpu(resultdef) and
  913. (nodetype in [addn,subn]) and
  914. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  915. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  916. begin
  917. sqr_sum:=true;
  918. tmp:=tinlinenode(left).left;
  919. tinlinenode(left).left:=nil;
  920. left.free;
  921. left:=tmp;
  922. tmp:=tinlinenode(right).left;
  923. tinlinenode(right).left:=nil;
  924. right.free;
  925. right:=tmp;
  926. end;
  927. {$endif dummy}
  928. pass_left_right;
  929. { fpu operands are always in reversed order on the stack }
  930. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  931. toggleflag(nf_swapped);
  932. if (nf_swapped in flags) then
  933. { can't use swapleftright if both are on the fpu stack, since then }
  934. { both are "R_ST" -> nothing would change -> manually switch }
  935. if (left.location.loc = LOC_FPUREGISTER) and
  936. (right.location.loc = LOC_FPUREGISTER) then
  937. emit_none(A_FXCH,S_NO)
  938. else
  939. swapleftright;
  940. case nodetype of
  941. addn :
  942. op:=OP_ADD;
  943. muln :
  944. op:=OP_MUL;
  945. subn :
  946. op:=OP_SUB;
  947. slashn :
  948. op:=OP_DIV;
  949. else
  950. internalerror(200312231);
  951. end;
  952. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  953. if sqr_sum then
  954. begin
  955. if nf_swapped in flags then
  956. swapleftright;
  957. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  958. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  959. location:=left.location;
  960. if is_double(resultdef) then
  961. begin
  962. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  963. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  964. case nodetype of
  965. addn:
  966. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  967. subn:
  968. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  969. else
  970. internalerror(201108162);
  971. end;
  972. end
  973. else
  974. begin
  975. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  976. { ensure that bits 64..127 contain valid values }
  977. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  978. { the data is now in bits 0..32 and 64..95 }
  979. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  980. case nodetype of
  981. addn:
  982. begin
  983. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  984. end;
  985. subn:
  986. begin
  987. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  988. end;
  989. else
  990. internalerror(201108163);
  991. end;
  992. end
  993. end
  994. { left*2 ? }
  995. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  996. begin
  997. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  998. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  999. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1000. left.location.register,
  1001. left.location.register,
  1002. location.register,
  1003. mms_movescalar);
  1004. end
  1005. { right*2 ? }
  1006. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  1007. begin
  1008. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  1009. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1010. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1011. right.location.register,
  1012. right.location.register,
  1013. location.register,
  1014. mms_movescalar);
  1015. end
  1016. { we can use only right as left operand if the operation is commutative }
  1017. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1018. begin
  1019. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1020. { force floating point reg. location to be written to memory,
  1021. we don't force it to mm register because writing to memory
  1022. allows probably shorter code because there is no direct fpu->mm register
  1023. copy instruction
  1024. }
  1025. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1026. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1027. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1028. left.location,
  1029. right.location.register,
  1030. location.register,
  1031. mms_movescalar);
  1032. end
  1033. else
  1034. begin
  1035. if (nf_swapped in flags) then
  1036. swapleftright;
  1037. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1038. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1039. { force floating point reg. location to be written to memory,
  1040. we don't force it to mm register because writing to memory
  1041. allows probably shorter code because there is no direct fpu->mm register
  1042. copy instruction
  1043. }
  1044. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1045. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1046. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1047. right.location,
  1048. left.location.register,
  1049. location.register,
  1050. mms_movescalar);
  1051. end;
  1052. end;
  1053. function tx86addnode.use_fma : boolean;
  1054. begin
  1055. {$ifndef i8086}
  1056. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1057. Result:=use_vectorfpu(resultdef) and
  1058. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1059. {$else i8086}
  1060. Result:=inherited use_fma;
  1061. {$endif i8086}
  1062. end;
  1063. procedure tx86addnode.second_cmpfloatvector;
  1064. var
  1065. op : tasmop;
  1066. const
  1067. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1068. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1069. begin
  1070. if is_single(left.resultdef) then
  1071. op:=ops_single[UseAVX]
  1072. else if is_double(left.resultdef) then
  1073. op:=ops_double[UseAVX]
  1074. else
  1075. internalerror(200402222);
  1076. pass_left_right;
  1077. location_reset(location,LOC_FLAGS,OS_NO);
  1078. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1079. memory (not to mm registers because one of the memory locations can be used
  1080. directly in compare instruction, yielding shorter code) }
  1081. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1082. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1083. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1084. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1085. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1086. begin
  1087. case left.location.loc of
  1088. LOC_REFERENCE,LOC_CREFERENCE:
  1089. begin
  1090. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1091. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1092. end;
  1093. LOC_MMREGISTER,LOC_CMMREGISTER:
  1094. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1095. else
  1096. internalerror(200402221);
  1097. end;
  1098. toggleflag(nf_swapped);
  1099. end
  1100. else
  1101. begin
  1102. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1103. case right.location.loc of
  1104. LOC_REFERENCE,LOC_CREFERENCE:
  1105. begin
  1106. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1107. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1108. end;
  1109. LOC_MMREGISTER,LOC_CMMREGISTER:
  1110. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1111. else
  1112. internalerror(200402223);
  1113. end;
  1114. end;
  1115. location.resflags:=getfpuresflags;
  1116. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1117. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1118. end;
  1119. procedure tx86addnode.second_opvector;
  1120. var
  1121. op : topcg;
  1122. begin
  1123. pass_left_right;
  1124. if (nf_swapped in flags) then
  1125. swapleftright;
  1126. case nodetype of
  1127. addn :
  1128. op:=OP_ADD;
  1129. muln :
  1130. op:=OP_MUL;
  1131. subn :
  1132. op:=OP_SUB;
  1133. slashn :
  1134. op:=OP_DIV;
  1135. else
  1136. internalerror(200610071);
  1137. end;
  1138. if fits_in_mm_register(left.resultdef) then
  1139. begin
  1140. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1141. { we can use only right as left operand if the operation is commutative }
  1142. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1143. begin
  1144. location.register:=right.location.register;
  1145. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1146. end
  1147. else
  1148. begin
  1149. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1150. location.register:=left.location.register;
  1151. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1152. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1153. end;
  1154. end
  1155. else
  1156. begin
  1157. { not yet supported }
  1158. internalerror(200610072);
  1159. end
  1160. end;
  1161. procedure tx86addnode.second_addfloat;
  1162. const
  1163. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1164. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1165. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1166. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1167. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1168. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1169. var
  1170. op : TAsmOp;
  1171. refnode : tnode;
  1172. hasref : boolean;
  1173. begin
  1174. if use_vectorfpu(resultdef) then
  1175. begin
  1176. if UseAVX then
  1177. second_addfloatavx
  1178. else
  1179. second_addfloatsse;
  1180. exit;
  1181. end;
  1182. pass_left_right;
  1183. prepare_x87_locations(refnode);
  1184. hasref:=assigned(refnode);
  1185. case nodetype of
  1186. addn :
  1187. op:=ops_add[hasref];
  1188. muln :
  1189. op:=ops_mul[hasref];
  1190. subn :
  1191. if (nf_swapped in flags) then
  1192. op:=ops_rsub[hasref]
  1193. else
  1194. op:=ops_sub[hasref];
  1195. slashn :
  1196. if (nf_swapped in flags) then
  1197. op:=ops_rdiv[hasref]
  1198. else
  1199. op:=ops_div[hasref];
  1200. else
  1201. internalerror(2003042214);
  1202. end;
  1203. if hasref then
  1204. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1205. else
  1206. begin
  1207. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1208. tcgx86(cg).dec_fpu_stack;
  1209. end;
  1210. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1211. location.register:=NR_ST;
  1212. end;
  1213. procedure tx86addnode.second_cmpfloat;
  1214. {$ifdef i8086}
  1215. var
  1216. tmpref: treference;
  1217. {$endif i8086}
  1218. begin
  1219. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1220. begin
  1221. second_cmpfloatvector;
  1222. exit;
  1223. end;
  1224. pass_left_right;
  1225. force_left_and_right_fpureg;
  1226. {$ifndef x86_64}
  1227. if current_settings.cputype<cpu_Pentium2 then
  1228. begin
  1229. emit_none(A_FCOMPP,S_NO);
  1230. tcgx86(cg).dec_fpu_stack;
  1231. tcgx86(cg).dec_fpu_stack;
  1232. { load fpu flags }
  1233. {$ifdef i8086}
  1234. if current_settings.cputype < cpu_286 then
  1235. begin
  1236. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1237. emit_ref(A_FSTSW,S_NO,tmpref);
  1238. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1239. inc(tmpref.offset);
  1240. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1241. dec(tmpref.offset);
  1242. emit_none(A_SAHF,S_NO);
  1243. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1244. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1245. end
  1246. else
  1247. {$endif i8086}
  1248. begin
  1249. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1250. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1251. emit_none(A_SAHF,S_NO);
  1252. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1253. end;
  1254. end
  1255. else
  1256. {$endif x86_64}
  1257. begin
  1258. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1259. { fcomip pops only one fpu register }
  1260. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1261. tcgx86(cg).dec_fpu_stack;
  1262. tcgx86(cg).dec_fpu_stack;
  1263. end;
  1264. location_reset(location,LOC_FLAGS,OS_NO);
  1265. location.resflags:=getfpuresflags;
  1266. end;
  1267. {*****************************************************************************
  1268. Add64bit
  1269. *****************************************************************************}
  1270. procedure tx86addnode.second_add64bit;
  1271. begin
  1272. {$ifdef cpu64bitalu}
  1273. second_addordinal;
  1274. {$else cpu64bitalu}
  1275. { must be implemented separate }
  1276. internalerror(200402042);
  1277. {$endif cpu64bitalu}
  1278. end;
  1279. procedure tx86addnode.second_cmp64bit;
  1280. begin
  1281. {$ifdef cpu64bitalu}
  1282. second_cmpordinal;
  1283. {$else cpu64bitalu}
  1284. { must be implemented separate }
  1285. internalerror(200402043);
  1286. {$endif cpu64bitalu}
  1287. end;
  1288. {*****************************************************************************
  1289. AddOrdinal
  1290. *****************************************************************************}
  1291. procedure tx86addnode.second_addordinal;
  1292. var
  1293. opsize : tcgsize;
  1294. unsigned : boolean;
  1295. cgop : topcg;
  1296. checkoverflow : Boolean;
  1297. ovloc : tlocation;
  1298. tmpreg : TRegister;
  1299. begin
  1300. { determine if the comparison will be unsigned }
  1301. unsigned:=not(is_signed(left.resultdef)) or
  1302. not(is_signed(right.resultdef));
  1303. { assume no overflow checking is require }
  1304. checkoverflow := false;
  1305. ovloc.loc:=LOC_VOID;
  1306. case nodetype of
  1307. addn:
  1308. begin
  1309. cgop:=OP_ADD;
  1310. checkoverflow:=true;
  1311. end;
  1312. xorn :
  1313. begin
  1314. cgop:=OP_XOR;
  1315. end;
  1316. orn :
  1317. begin
  1318. cgop:=OP_OR;
  1319. end;
  1320. andn:
  1321. begin
  1322. cgop:=OP_AND;
  1323. end;
  1324. muln:
  1325. begin
  1326. checkoverflow:=true;
  1327. if unsigned then
  1328. cgop:=OP_MUL
  1329. else
  1330. cgop:=OP_IMUL;
  1331. end;
  1332. subn :
  1333. begin
  1334. checkoverflow:=true;
  1335. cgop:=OP_SUB;
  1336. end;
  1337. else
  1338. internalerror(2015022501);
  1339. end;
  1340. checkoverflow:=
  1341. checkoverflow and
  1342. (left.resultdef.typ<>pointerdef) and
  1343. (right.resultdef.typ<>pointerdef) and
  1344. (cs_check_overflow in current_settings.localswitches);
  1345. opsize:=def_cgsize(left.resultdef);
  1346. pass_left_right;
  1347. { do have to allocate a register? If yes, then three opcode instructions are better }
  1348. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER)) or
  1349. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1350. begin
  1351. { allocate registers }
  1352. force_reg_left_right(false,true);
  1353. set_result_location_reg;
  1354. if nodetype<>subn then
  1355. begin
  1356. if (right.location.loc<>LOC_CONSTANT) then
  1357. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1358. left.location.register,right.location.register,
  1359. location.register,checkoverflow,ovloc)
  1360. else
  1361. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1362. right.location.value,left.location.register,
  1363. location.register,checkoverflow,ovloc);
  1364. end
  1365. else { subtract is a special case since its not commutative }
  1366. begin
  1367. if (nf_swapped in flags) then
  1368. swapleftright;
  1369. if left.location.loc<>LOC_CONSTANT then
  1370. begin
  1371. if right.location.loc<>LOC_CONSTANT then
  1372. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1373. right.location.register,left.location.register,
  1374. location.register,checkoverflow,ovloc)
  1375. else
  1376. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1377. right.location.value,left.location.register,
  1378. location.register,checkoverflow,ovloc);
  1379. end
  1380. else
  1381. begin
  1382. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1383. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1384. left.location.value,tmpreg);
  1385. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1386. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1387. end;
  1388. end
  1389. end
  1390. else
  1391. begin
  1392. { at least one location is a register, re-use it, so we can try two operand opcodes }
  1393. if left.location.loc<>LOC_REGISTER then
  1394. begin
  1395. if right.location.loc<>LOC_REGISTER then
  1396. begin
  1397. { tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1398. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,left.location,tmpreg);
  1399. location_reset(left.location,LOC_REGISTER,opsize);
  1400. left.location.register:=tmpreg;
  1401. }
  1402. Internalerror(2018031102);
  1403. end
  1404. else
  1405. begin
  1406. location_swap(left.location,right.location);
  1407. toggleflag(nf_swapped);
  1408. end;
  1409. end;
  1410. { at this point, left.location.loc should be LOC_REGISTER }
  1411. if right.location.loc=LOC_REGISTER then
  1412. begin
  1413. { when swapped another result register }
  1414. if (nodetype=subn) and (nf_swapped in flags) then
  1415. begin
  1416. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1417. left.location.register,right.location.register);
  1418. location_swap(left.location,right.location);
  1419. toggleflag(nf_swapped);
  1420. end
  1421. else
  1422. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1423. right.location.register,left.location.register);
  1424. end
  1425. else
  1426. begin
  1427. { right.location<>LOC_REGISTER }
  1428. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  1429. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  1430. if (nodetype=subn) and (nf_swapped in flags) then
  1431. begin
  1432. tmpreg:=left.location.register;
  1433. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1434. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  1435. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  1436. end
  1437. else
  1438. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  1439. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1440. end;
  1441. location_copy(location,left.location);
  1442. end;
  1443. { emit overflow check if required }
  1444. if checkoverflow then
  1445. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  1446. end;
  1447. procedure tx86addnode.second_cmpordinal;
  1448. var
  1449. opdef : tdef;
  1450. opsize : tcgsize;
  1451. unsigned : boolean;
  1452. begin
  1453. unsigned:=not(is_signed(left.resultdef)) or
  1454. not(is_signed(right.resultdef));
  1455. opdef:=left.resultdef;
  1456. opsize:=def_cgsize(opdef);
  1457. pass_left_right;
  1458. if (right.location.loc=LOC_CONSTANT) and
  1459. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1460. {$ifdef x86_64}
  1461. and ((not (opsize in [OS_64,OS_S64])) or (
  1462. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1463. ))
  1464. {$endif x86_64}
  1465. then
  1466. begin
  1467. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1468. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1469. end
  1470. else
  1471. begin
  1472. left_must_be_reg(opdef,opsize,false);
  1473. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1474. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1475. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1476. end;
  1477. location_reset(location,LOC_FLAGS,OS_NO);
  1478. location.resflags:=getresflags(unsigned);
  1479. end;
  1480. begin
  1481. caddnode:=tx86addnode;
  1482. end.