nx86add.pas 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function pass_1 : tnode;override;
  40. function use_fma : boolean;override;
  41. procedure second_addfloat;override;
  42. {$ifndef i8086}
  43. procedure second_addsmallset;override;
  44. {$endif not i8086}
  45. procedure second_add64bit;override;
  46. procedure second_cmpfloat;override;
  47. procedure second_cmpsmallset;override;
  48. procedure second_cmp64bit;override;
  49. procedure second_cmpordinal;override;
  50. procedure second_addordinal;override;
  51. procedure second_addboolean;override;
  52. {$ifdef SUPPORT_MMX}
  53. procedure second_opmmx;override;
  54. {$endif SUPPORT_MMX}
  55. procedure second_opvector;override;
  56. end;
  57. implementation
  58. uses
  59. globtype,globals,
  60. verbose,cutils,compinnr,
  61. cpuinfo,
  62. aasmbase,aasmdata,aasmcpu,
  63. symconst,symdef,
  64. cgobj,hlcgobj,cgx86,cga,cgutils,
  65. tgobj,ncgutil,
  66. ncon,nset,ninl,ncnv,
  67. defutil,
  68. htypechk;
  69. { Range check must be disabled explicitly as the code serves
  70. on three different architecture sizes }
  71. {$R-}
  72. {*****************************************************************************
  73. Helpers
  74. *****************************************************************************}
  75. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  76. var
  77. power : longint;
  78. hl4 : tasmlabel;
  79. r : Tregister;
  80. href : treference;
  81. overflowcheck: boolean;
  82. begin
  83. overflowcheck:=needoverflowcheck;
  84. { at this point, left.location.loc should be LOC_REGISTER }
  85. if right.location.loc=LOC_REGISTER then
  86. begin
  87. { right.location is a LOC_REGISTER }
  88. { when swapped another result register }
  89. if (nodetype=subn) and (nf_swapped in flags) then
  90. begin
  91. if extra_not then
  92. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  93. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  94. { newly swapped also set swapped flag }
  95. location_swap(left.location,right.location);
  96. toggleflag(nf_swapped);
  97. end
  98. else
  99. begin
  100. if extra_not then
  101. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  102. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  103. location_swap(left.location,right.location);
  104. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  105. end;
  106. end
  107. else
  108. begin
  109. { right.location is not a LOC_REGISTER }
  110. if (nodetype=subn) and (nf_swapped in flags) then
  111. begin
  112. if extra_not then
  113. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  114. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  115. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  116. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  117. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  118. end
  119. else
  120. begin
  121. { Optimizations when right.location is a constant value }
  122. if (op=A_CMP) and
  123. (nodetype in [equaln,unequaln]) and
  124. (right.location.loc=LOC_CONSTANT) and
  125. (right.location.value=0) then
  126. begin
  127. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  128. spilling, while 'test %reg,%reg' still requires loading into register.
  129. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  130. peephole optimizer (this optimization is currently available only for i386). }
  131. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  132. {$ifdef i386}
  133. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  134. {$else i386}
  135. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  136. {$endif i386}
  137. end
  138. else
  139. if (op=A_ADD) and
  140. (right.location.loc=LOC_CONSTANT) and
  141. (right.location.value=1) and
  142. not overflowcheck and
  143. UseIncDec then
  144. begin
  145. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  146. end
  147. else
  148. if (op=A_SUB) and
  149. (right.location.loc=LOC_CONSTANT) and
  150. (right.location.value=1) and
  151. overflowcheck and
  152. UseIncDec then
  153. begin
  154. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  155. end
  156. else
  157. if (op=A_IMUL) and
  158. (right.location.loc=LOC_CONSTANT) and
  159. (ispowerof2(int64(right.location.value),power)) and
  160. overflowcheck then
  161. begin
  162. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  163. end
  164. else if (op=A_IMUL) and
  165. (right.location.loc=LOC_CONSTANT) and
  166. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  167. (power in [1..3]) and
  168. not overflowcheck then
  169. begin
  170. reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
  171. href.index:=left.location.register;
  172. href.scalefactor:=int64(right.location.value)-1;
  173. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  174. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  175. end
  176. else
  177. begin
  178. if extra_not then
  179. begin
  180. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  181. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  182. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  183. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  184. end
  185. else
  186. begin
  187. emit_op_right_left(op,opsize);
  188. end;
  189. end;
  190. end;
  191. end;
  192. { only in case of overflow operations }
  193. { produce overflow code }
  194. { we must put it here directly, because sign of operation }
  195. { is in unsigned VAR!! }
  196. if mboverflow then
  197. begin
  198. if overflowcheck then
  199. begin
  200. current_asmdata.getjumplabel(hl4);
  201. if unsigned then
  202. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  203. else
  204. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  205. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  206. cg.a_label(current_asmdata.CurrAsmList,hl4);
  207. end;
  208. end;
  209. end;
  210. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  211. begin
  212. { left location is not a register? }
  213. if (left.location.loc<>LOC_REGISTER) then
  214. begin
  215. { if right is register then we can swap the locations }
  216. if (not noswap) and
  217. (right.location.loc=LOC_REGISTER) then
  218. begin
  219. location_swap(left.location,right.location);
  220. toggleflag(nf_swapped);
  221. end
  222. else if (not noswap) and
  223. (right.location.loc=LOC_CREGISTER) then
  224. begin
  225. location_swap(left.location,right.location);
  226. toggleflag(nf_swapped);
  227. { maybe we can reuse a constant register when the
  228. operation is a comparison that doesn't change the
  229. value of the register }
  230. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  231. location:=left.location;
  232. end
  233. else
  234. begin
  235. { maybe we can reuse a constant register when the
  236. operation is a comparison that doesn't change the
  237. value of the register }
  238. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  239. end;
  240. end;
  241. if (right.location.loc<>LOC_CONSTANT) and
  242. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  243. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  244. if (left.location.loc<>LOC_CONSTANT) and
  245. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  246. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  247. end;
  248. procedure tx86addnode.force_left_and_right_fpureg;
  249. begin
  250. if (right.location.loc<>LOC_FPUREGISTER) then
  251. begin
  252. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  253. if (left.location.loc<>LOC_FPUREGISTER) then
  254. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  255. else
  256. { left was on the stack => swap }
  257. toggleflag(nf_swapped);
  258. end
  259. { the nominator in st0 }
  260. else if (left.location.loc<>LOC_FPUREGISTER) then
  261. begin
  262. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  263. end
  264. else
  265. begin
  266. { fpu operands are always in the wrong order on the stack }
  267. toggleflag(nf_swapped);
  268. end;
  269. end;
  270. { Makes sides suitable for executing an x87 instruction:
  271. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  272. everything else is loaded to FPU stack. }
  273. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  274. begin
  275. refnode:=nil;
  276. { later on, no mm registers are allowed, so transfer everything to memory here
  277. below it is loaded into an fpu register if neede }
  278. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  279. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  280. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  281. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  282. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  283. 0:
  284. begin
  285. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  286. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  287. InternalError(2013090803);
  288. if (left.location.size in [OS_F32,OS_F64]) then
  289. begin
  290. refnode:=left;
  291. toggleflag(nf_swapped);
  292. end
  293. else
  294. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  295. end;
  296. 1:
  297. begin { if left is on the stack then swap. }
  298. if (left.location.loc=LOC_FPUREGISTER) then
  299. refnode:=right
  300. else
  301. refnode:=left;
  302. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  303. InternalError(2013090801);
  304. if not (refnode.location.size in [OS_F32,OS_F64]) then
  305. begin
  306. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  307. if (refnode=right) then
  308. toggleflag(nf_swapped);
  309. refnode:=nil;
  310. end
  311. else
  312. begin
  313. if (refnode=left) then
  314. toggleflag(nf_swapped);
  315. end;
  316. end;
  317. 2: { fpu operands are always in the wrong order on the stack }
  318. toggleflag(nf_swapped);
  319. else
  320. InternalError(2013090802);
  321. end;
  322. end;
  323. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  324. {$ifdef x86_64}
  325. var
  326. tmpreg : tregister;
  327. {$endif x86_64}
  328. begin
  329. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  330. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  331. { left must be a register }
  332. case right.location.loc of
  333. LOC_REGISTER,
  334. LOC_CREGISTER :
  335. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  336. LOC_REFERENCE,
  337. LOC_CREFERENCE :
  338. begin
  339. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  340. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  341. end;
  342. LOC_CONSTANT :
  343. begin
  344. {$ifdef x86_64}
  345. { x86_64 only supports signed 32 bits constants directly }
  346. if (opsize in [OS_S64,OS_64]) and
  347. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  348. begin
  349. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  350. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  351. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  352. end
  353. else
  354. {$endif x86_64}
  355. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  356. end;
  357. else
  358. internalerror(200203232);
  359. end;
  360. end;
  361. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  362. begin
  363. case nodetype of
  364. equaln : getresflags:=F_E;
  365. unequaln : getresflags:=F_NE;
  366. else
  367. if not(unsigned) then
  368. begin
  369. if nf_swapped in flags then
  370. case nodetype of
  371. ltn : getresflags:=F_G;
  372. lten : getresflags:=F_GE;
  373. gtn : getresflags:=F_L;
  374. gten : getresflags:=F_LE;
  375. else
  376. internalerror(2013120105);
  377. end
  378. else
  379. case nodetype of
  380. ltn : getresflags:=F_L;
  381. lten : getresflags:=F_LE;
  382. gtn : getresflags:=F_G;
  383. gten : getresflags:=F_GE;
  384. else
  385. internalerror(2013120106);
  386. end;
  387. end
  388. else
  389. begin
  390. if nf_swapped in flags then
  391. case nodetype of
  392. ltn : getresflags:=F_A;
  393. lten : getresflags:=F_AE;
  394. gtn : getresflags:=F_B;
  395. gten : getresflags:=F_BE;
  396. else
  397. internalerror(2013120107);
  398. end
  399. else
  400. case nodetype of
  401. ltn : getresflags:=F_B;
  402. lten : getresflags:=F_BE;
  403. gtn : getresflags:=F_A;
  404. gten : getresflags:=F_AE;
  405. else
  406. internalerror(2013120108);
  407. end;
  408. end;
  409. end;
  410. end;
  411. function tx86addnode.getfpuresflags : tresflags;
  412. begin
  413. if (nodetype=equaln) then
  414. result:=F_FE
  415. else if (nodetype=unequaln) then
  416. result:=F_FNE
  417. else if (nf_swapped in flags) then
  418. case nodetype of
  419. ltn : result:=F_FA;
  420. lten : result:=F_FAE;
  421. gtn : result:=F_FB;
  422. gten : result:=F_FBE;
  423. else
  424. internalerror(2014031402);
  425. end
  426. else
  427. case nodetype of
  428. ltn : result:=F_FB;
  429. lten : result:=F_FBE;
  430. gtn : result:=F_FA;
  431. gten : result:=F_FAE;
  432. else
  433. internalerror(2014031403);
  434. end;
  435. end;
  436. {*****************************************************************************
  437. AddSmallSet
  438. *****************************************************************************}
  439. {$ifndef i8086}
  440. procedure tx86addnode.second_addsmallset;
  441. var
  442. setbase : aint;
  443. opdef : tdef;
  444. opsize : TCGSize;
  445. op : TAsmOp;
  446. extra_not,
  447. noswap : boolean;
  448. all_member_optimization:boolean;
  449. begin
  450. pass_left_right;
  451. noswap:=false;
  452. extra_not:=false;
  453. all_member_optimization:=false;
  454. opdef:=resultdef;
  455. opsize:=int_cgsize(opdef.size);
  456. if (left.resultdef.typ=setdef) then
  457. setbase:=tsetdef(left.resultdef).setbase
  458. else
  459. setbase:=tsetdef(right.resultdef).setbase;
  460. case nodetype of
  461. addn :
  462. begin
  463. { adding elements is not commutative }
  464. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  465. swapleftright;
  466. { are we adding set elements ? }
  467. if right.nodetype=setelementn then
  468. begin
  469. { no range support for smallsets! }
  470. if assigned(tsetelementnode(right).right) then
  471. internalerror(43244);
  472. { btsb isn't supported }
  473. if opsize=OS_8 then
  474. begin
  475. opsize:=OS_32;
  476. opdef:=u32inttype;
  477. end;
  478. { bts requires both elements to be registers }
  479. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  480. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  481. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  482. op:=A_BTS;
  483. noswap:=true;
  484. end
  485. else
  486. op:=A_OR;
  487. end;
  488. symdifn :
  489. op:=A_XOR;
  490. muln :
  491. op:=A_AND;
  492. subn :
  493. begin
  494. op:=A_AND;
  495. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  496. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  497. all_member_optimization:=true;
  498. if (not(nf_swapped in flags)) and
  499. (right.location.loc=LOC_CONSTANT) then
  500. right.location.value := not(right.location.value)
  501. else if (nf_swapped in flags) and
  502. (left.location.loc=LOC_CONSTANT) then
  503. left.location.value := not(left.location.value)
  504. else
  505. extra_not:=true;
  506. end;
  507. xorn :
  508. op:=A_XOR;
  509. orn :
  510. op:=A_OR;
  511. andn :
  512. op:=A_AND;
  513. else
  514. internalerror(2003042215);
  515. end;
  516. if all_member_optimization then
  517. begin
  518. {A set expression [0..31]-x can be implemented with a simple NOT.}
  519. if nf_swapped in flags then
  520. begin
  521. { newly swapped also set swapped flag }
  522. location_swap(left.location,right.location);
  523. toggleflag(nf_swapped);
  524. end;
  525. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  526. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  527. location:=right.location;
  528. end
  529. else
  530. begin
  531. { can we use the BMI1 instruction andn? }
  532. if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
  533. (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
  534. begin
  535. location_reset(location,LOC_REGISTER,left.location.size);
  536. location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
  537. if nf_swapped in flags then
  538. begin
  539. location_swap(left.location,right.location);
  540. toggleflag(nf_swapped);
  541. end;
  542. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
  543. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  544. hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
  545. case left.location.loc of
  546. LOC_CREGISTER,LOC_REGISTER:
  547. emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
  548. LOC_CREFERENCE,LOC_REFERENCE:
  549. emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
  550. else
  551. Internalerror(2018040201);
  552. end;
  553. end
  554. else
  555. begin
  556. { left must be a register }
  557. left_must_be_reg(opdef,opsize,noswap);
  558. emit_generic_code(op,opsize,true,extra_not,false);
  559. location_freetemp(current_asmdata.CurrAsmList,right.location);
  560. { left is always a register and contains the result }
  561. location:=left.location;
  562. end;
  563. end;
  564. { fix the changed opsize we did above because of the missing btsb }
  565. if opsize<>int_cgsize(resultdef.size) then
  566. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  567. end;
  568. {$endif not i8086}
  569. procedure tx86addnode.second_cmpsmallset;
  570. var
  571. opdef : tdef;
  572. opsize : TCGSize;
  573. op : TAsmOp;
  574. begin
  575. pass_left_right;
  576. opdef:=left.resultdef;
  577. opsize:=int_cgsize(opdef.size);
  578. case nodetype of
  579. equaln,
  580. unequaln :
  581. op:=A_CMP;
  582. lten,gten:
  583. begin
  584. if (not(nf_swapped in flags) and (nodetype = lten)) or
  585. ((nf_swapped in flags) and (nodetype = gten)) then
  586. swapleftright;
  587. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  588. emit_op_right_left(A_AND,opsize);
  589. op:=A_CMP;
  590. { warning: ugly hack, we need a JE so change the node to equaln }
  591. nodetype:=equaln;
  592. end;
  593. else
  594. internalerror(2003042215);
  595. end;
  596. { left must be a register }
  597. left_must_be_reg(opdef,opsize,false);
  598. emit_generic_code(op,opsize,true,false,false);
  599. location_freetemp(current_asmdata.CurrAsmList,right.location);
  600. location_freetemp(current_asmdata.CurrAsmList,left.location);
  601. location_reset(location,LOC_FLAGS,OS_NO);
  602. location.resflags:=getresflags(true);
  603. end;
  604. {*****************************************************************************
  605. AddMMX
  606. *****************************************************************************}
  607. {$ifdef SUPPORT_MMX}
  608. procedure tx86addnode.second_opmmx;
  609. var
  610. op : TAsmOp;
  611. cmpop : boolean;
  612. mmxbase : tmmxtype;
  613. hreg,
  614. hregister : tregister;
  615. begin
  616. pass_left_right;
  617. cmpop:=false;
  618. op:=A_NOP;
  619. mmxbase:=mmx_type(left.resultdef);
  620. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  621. case nodetype of
  622. addn :
  623. begin
  624. if (cs_mmx_saturation in current_settings.localswitches) then
  625. begin
  626. case mmxbase of
  627. mmxs8bit:
  628. op:=A_PADDSB;
  629. mmxu8bit:
  630. op:=A_PADDUSB;
  631. mmxs16bit,mmxfixed16:
  632. op:=A_PADDSW;
  633. mmxu16bit:
  634. op:=A_PADDUSW;
  635. else
  636. ;
  637. end;
  638. end
  639. else
  640. begin
  641. case mmxbase of
  642. mmxs8bit,mmxu8bit:
  643. op:=A_PADDB;
  644. mmxs16bit,mmxu16bit,mmxfixed16:
  645. op:=A_PADDW;
  646. mmxs32bit,mmxu32bit:
  647. op:=A_PADDD;
  648. else
  649. ;
  650. end;
  651. end;
  652. end;
  653. muln :
  654. begin
  655. case mmxbase of
  656. mmxs16bit,mmxu16bit:
  657. op:=A_PMULLW;
  658. mmxfixed16:
  659. op:=A_PMULHW;
  660. else
  661. ;
  662. end;
  663. end;
  664. subn :
  665. begin
  666. if (cs_mmx_saturation in current_settings.localswitches) then
  667. begin
  668. case mmxbase of
  669. mmxs8bit:
  670. op:=A_PSUBSB;
  671. mmxu8bit:
  672. op:=A_PSUBUSB;
  673. mmxs16bit,mmxfixed16:
  674. op:=A_PSUBSB;
  675. mmxu16bit:
  676. op:=A_PSUBUSW;
  677. else
  678. ;
  679. end;
  680. end
  681. else
  682. begin
  683. case mmxbase of
  684. mmxs8bit,mmxu8bit:
  685. op:=A_PSUBB;
  686. mmxs16bit,mmxu16bit,mmxfixed16:
  687. op:=A_PSUBW;
  688. mmxs32bit,mmxu32bit:
  689. op:=A_PSUBD;
  690. else
  691. ;
  692. end;
  693. end;
  694. end;
  695. xorn:
  696. op:=A_PXOR;
  697. orn:
  698. op:=A_POR;
  699. andn:
  700. op:=A_PAND;
  701. else
  702. internalerror(2003042214);
  703. end;
  704. if op = A_NOP then
  705. internalerror(201408201);
  706. { left and right no register? }
  707. { then one must be demanded }
  708. if (left.location.loc<>LOC_MMXREGISTER) then
  709. begin
  710. if (right.location.loc=LOC_MMXREGISTER) then
  711. begin
  712. location_swap(left.location,right.location);
  713. toggleflag(nf_swapped);
  714. end
  715. else
  716. begin
  717. { register variable ? }
  718. if (left.location.loc=LOC_CMMXREGISTER) then
  719. begin
  720. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  721. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  722. end
  723. else
  724. begin
  725. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  726. internalerror(200203245);
  727. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  728. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  729. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  730. end;
  731. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  732. left.location.register:=hregister;
  733. end;
  734. end;
  735. { at this point, left.location.loc should be LOC_MMXREGISTER }
  736. if right.location.loc<>LOC_MMXREGISTER then
  737. begin
  738. if (nodetype=subn) and (nf_swapped in flags) then
  739. begin
  740. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  741. if right.location.loc=LOC_CMMXREGISTER then
  742. begin
  743. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  744. emit_reg_reg(op,S_NO,left.location.register,hreg);
  745. end
  746. else
  747. begin
  748. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  749. internalerror(200203247);
  750. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  751. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  752. emit_reg_reg(op,S_NO,left.location.register,hreg);
  753. end;
  754. location.register:=hreg;
  755. end
  756. else
  757. begin
  758. if (right.location.loc=LOC_CMMXREGISTER) then
  759. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  760. else
  761. begin
  762. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  763. internalerror(200203246);
  764. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  765. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  766. end;
  767. location.register:=left.location.register;
  768. end;
  769. end
  770. else
  771. begin
  772. { right.location=LOC_MMXREGISTER }
  773. if (nodetype=subn) and (nf_swapped in flags) then
  774. begin
  775. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  776. location_swap(left.location,right.location);
  777. toggleflag(nf_swapped);
  778. end
  779. else
  780. begin
  781. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  782. end;
  783. location.register:=left.location.register;
  784. end;
  785. location_freetemp(current_asmdata.CurrAsmList,right.location);
  786. if cmpop then
  787. location_freetemp(current_asmdata.CurrAsmList,left.location);
  788. end;
  789. {$endif SUPPORT_MMX}
  790. {*****************************************************************************
  791. AddFloat
  792. *****************************************************************************}
  793. procedure tx86addnode.second_addfloatsse;
  794. var
  795. op : topcg;
  796. sqr_sum : boolean;
  797. tmp : tnode;
  798. begin
  799. sqr_sum:=false;
  800. if (current_settings.fputype>=fpu_sse3) and
  801. use_vectorfpu(resultdef) and
  802. (nodetype in [addn,subn]) and
  803. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  804. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  805. begin
  806. sqr_sum:=true;
  807. tmp:=tinlinenode(left).left;
  808. tinlinenode(left).left:=nil;
  809. left.free;
  810. left:=tmp;
  811. tmp:=tinlinenode(right).left;
  812. tinlinenode(right).left:=nil;
  813. right.free;
  814. right:=tmp;
  815. end;
  816. pass_left_right;
  817. { fpu operands are always in reversed order on the stack }
  818. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  819. toggleflag(nf_swapped);
  820. if (nf_swapped in flags) then
  821. { can't use swapleftright if both are on the fpu stack, since then }
  822. { both are "R_ST" -> nothing would change -> manually switch }
  823. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
  824. (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  825. emit_none(A_FXCH,S_NO)
  826. else
  827. swapleftright;
  828. case nodetype of
  829. addn :
  830. op:=OP_ADD;
  831. muln :
  832. op:=OP_MUL;
  833. subn :
  834. op:=OP_SUB;
  835. slashn :
  836. op:=OP_DIV;
  837. else
  838. internalerror(200312231);
  839. end;
  840. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  841. if sqr_sum then
  842. begin
  843. if nf_swapped in flags then
  844. swapleftright;
  845. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  846. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  847. location:=left.location;
  848. if is_double(resultdef) then
  849. begin
  850. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  851. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  852. case nodetype of
  853. addn:
  854. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  855. subn:
  856. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  857. else
  858. internalerror(201108162);
  859. end;
  860. end
  861. else
  862. begin
  863. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  864. { ensure that bits 64..127 contain valid values }
  865. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  866. { the data is now in bits 0..32 and 64..95 }
  867. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  868. case nodetype of
  869. addn:
  870. begin
  871. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  872. end;
  873. subn:
  874. begin
  875. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  876. end;
  877. else
  878. internalerror(201108163);
  879. end;
  880. end
  881. end
  882. { we can use only right as left operand if the operation is commutative }
  883. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  884. begin
  885. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  886. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  887. { force floating point reg. location to be written to memory,
  888. we don't force it to mm register because writing to memory
  889. allows probably shorter code because there is no direct fpu->mm register
  890. copy instruction
  891. }
  892. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  893. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  894. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  895. end
  896. else
  897. begin
  898. if nf_swapped in flags then
  899. swapleftright;
  900. { force floating point reg. location to be written to memory,
  901. we don't force it to mm register because writing to memory
  902. allows probably shorter code because there is no direct fpu->mm register
  903. copy instruction
  904. }
  905. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  906. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  907. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  908. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  909. { force floating point reg. location to be written to memory,
  910. we don't force it to mm register because writing to memory
  911. allows probably shorter code because there is no direct fpu->mm register
  912. copy instruction
  913. }
  914. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  915. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  916. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  917. end;
  918. end;
  919. procedure tx86addnode.second_addfloatavx;
  920. var
  921. op : topcg;
  922. sqr_sum : boolean;
  923. {$ifdef dummy}
  924. tmp : tnode;
  925. {$endif dummy}
  926. begin
  927. sqr_sum:=false;
  928. {$ifdef dummy}
  929. if (current_settings.fputype>=fpu_sse3) and
  930. use_vectorfpu(resultdef) and
  931. (nodetype in [addn,subn]) and
  932. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  933. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  934. begin
  935. sqr_sum:=true;
  936. tmp:=tinlinenode(left).left;
  937. tinlinenode(left).left:=nil;
  938. left.free;
  939. left:=tmp;
  940. tmp:=tinlinenode(right).left;
  941. tinlinenode(right).left:=nil;
  942. right.free;
  943. right:=tmp;
  944. end;
  945. {$endif dummy}
  946. pass_left_right;
  947. { fpu operands are always in reversed order on the stack }
  948. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  949. toggleflag(nf_swapped);
  950. if (nf_swapped in flags) then
  951. { can't use swapleftright if both are on the fpu stack, since then }
  952. { both are "R_ST" -> nothing would change -> manually switch }
  953. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
  954. (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  955. emit_none(A_FXCH,S_NO)
  956. else
  957. swapleftright;
  958. case nodetype of
  959. addn :
  960. op:=OP_ADD;
  961. muln :
  962. op:=OP_MUL;
  963. subn :
  964. op:=OP_SUB;
  965. slashn :
  966. op:=OP_DIV;
  967. else
  968. internalerror(200312231);
  969. end;
  970. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  971. if sqr_sum then
  972. begin
  973. if nf_swapped in flags then
  974. swapleftright;
  975. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  976. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  977. location:=left.location;
  978. if is_double(resultdef) then
  979. begin
  980. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  981. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  982. case nodetype of
  983. addn:
  984. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  985. subn:
  986. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  987. else
  988. internalerror(201108162);
  989. end;
  990. end
  991. else
  992. begin
  993. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  994. { ensure that bits 64..127 contain valid values }
  995. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  996. { the data is now in bits 0..32 and 64..95 }
  997. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  998. case nodetype of
  999. addn:
  1000. begin
  1001. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  1002. end;
  1003. subn:
  1004. begin
  1005. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  1006. end;
  1007. else
  1008. internalerror(201108163);
  1009. end;
  1010. end
  1011. end
  1012. { left*2 ? }
  1013. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  1014. begin
  1015. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1016. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1017. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1018. left.location.register,
  1019. left.location.register,
  1020. location.register,
  1021. mms_movescalar);
  1022. end
  1023. { right*2 ? }
  1024. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  1025. begin
  1026. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  1027. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1028. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1029. right.location.register,
  1030. right.location.register,
  1031. location.register,
  1032. mms_movescalar);
  1033. end
  1034. { we can use only right as left operand if the operation is commutative }
  1035. else if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) and (op in [OP_ADD,OP_MUL]) then
  1036. begin
  1037. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1038. { force floating point reg. location to be written to memory,
  1039. we don't force it to mm register because writing to memory
  1040. allows probably shorter code because there is no direct fpu->mm register
  1041. copy instruction
  1042. }
  1043. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1044. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1045. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1046. left.location,
  1047. right.location.register,
  1048. location.register,
  1049. mms_movescalar);
  1050. end
  1051. else
  1052. begin
  1053. if (nf_swapped in flags) then
  1054. swapleftright;
  1055. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1056. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1057. { force floating point reg. location to be written to memory,
  1058. we don't force it to mm register because writing to memory
  1059. allows probably shorter code because there is no direct fpu->mm register
  1060. copy instruction
  1061. }
  1062. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1063. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1064. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1065. right.location,
  1066. left.location.register,
  1067. location.register,
  1068. mms_movescalar);
  1069. end;
  1070. end;
  1071. function tx86addnode.pass_1: tnode;
  1072. begin
  1073. { on x86, we do not support fpu registers, so in case of operations using the x87, it
  1074. is normally useful, not to put the operands into registers which would be mm register }
  1075. if ((left.resultdef.typ=floatdef) or (right.resultdef.typ=floatdef)) and
  1076. (not(use_vectorfpu(left.resultdef)) and not(use_vectorfpu(right.resultdef)) and
  1077. not(use_vectorfpu(resultdef))) then
  1078. begin
  1079. make_not_regable(left,[ra_addr_regable]);
  1080. make_not_regable(right,[ra_addr_regable]);
  1081. end;
  1082. Result:=inherited pass_1;
  1083. end;
  1084. function tx86addnode.use_fma : boolean;
  1085. begin
  1086. {$ifndef i8086}
  1087. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1088. Result:=use_vectorfpu(resultdef) and
  1089. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1090. {$else i8086}
  1091. Result:=inherited use_fma;
  1092. {$endif i8086}
  1093. end;
  1094. procedure tx86addnode.second_cmpfloatvector;
  1095. var
  1096. op : tasmop;
  1097. const
  1098. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1099. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1100. begin
  1101. if is_single(left.resultdef) then
  1102. op:=ops_single[UseAVX]
  1103. else if is_double(left.resultdef) then
  1104. op:=ops_double[UseAVX]
  1105. else
  1106. internalerror(200402222);
  1107. pass_left_right;
  1108. { fpu operands are always in reversed order on the stack }
  1109. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  1110. toggleflag(nf_swapped);
  1111. location_reset(location,LOC_FLAGS,OS_NO);
  1112. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1113. memory (not to mm registers because one of the memory locations can be used
  1114. directly in compare instruction, yielding shorter code) }
  1115. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1116. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1117. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1118. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1119. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1120. begin
  1121. case left.location.loc of
  1122. LOC_REFERENCE,LOC_CREFERENCE:
  1123. begin
  1124. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1125. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1126. end;
  1127. LOC_MMREGISTER,LOC_CMMREGISTER:
  1128. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1129. else
  1130. internalerror(200402221);
  1131. end;
  1132. toggleflag(nf_swapped);
  1133. end
  1134. else
  1135. begin
  1136. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1137. case right.location.loc of
  1138. LOC_REFERENCE,LOC_CREFERENCE:
  1139. begin
  1140. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1141. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1142. end;
  1143. LOC_MMREGISTER,LOC_CMMREGISTER:
  1144. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1145. else
  1146. internalerror(200402223);
  1147. end;
  1148. end;
  1149. location.resflags:=getfpuresflags;
  1150. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1151. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1152. end;
  1153. procedure tx86addnode.second_opvector;
  1154. var
  1155. op : topcg;
  1156. begin
  1157. pass_left_right;
  1158. if (nf_swapped in flags) then
  1159. swapleftright;
  1160. case nodetype of
  1161. addn :
  1162. op:=OP_ADD;
  1163. muln :
  1164. op:=OP_MUL;
  1165. subn :
  1166. op:=OP_SUB;
  1167. slashn :
  1168. op:=OP_DIV;
  1169. else
  1170. internalerror(200610071);
  1171. end;
  1172. if fits_in_mm_register(left.resultdef) then
  1173. begin
  1174. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1175. { we can use only right as left operand if the operation is commutative }
  1176. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1177. begin
  1178. location.register:=right.location.register;
  1179. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1180. end
  1181. else
  1182. begin
  1183. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1184. location.register:=left.location.register;
  1185. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1186. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1187. end;
  1188. end
  1189. else
  1190. begin
  1191. { not yet supported }
  1192. internalerror(200610072);
  1193. end
  1194. end;
  1195. procedure tx86addnode.second_addfloat;
  1196. const
  1197. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1198. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1199. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1200. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1201. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1202. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1203. var
  1204. op : TAsmOp;
  1205. refnode, hp: tnode;
  1206. hasref : boolean;
  1207. begin
  1208. if use_vectorfpu(resultdef) then
  1209. begin
  1210. if UseAVX then
  1211. second_addfloatavx
  1212. else
  1213. second_addfloatsse;
  1214. exit;
  1215. end;
  1216. { can the operation do the conversion? }
  1217. if (left.nodetype=typeconvn) and (is_double(ttypeconvnode(left).left.resultdef) or is_single(ttypeconvnode(left).left.resultdef)) then
  1218. begin
  1219. hp:=left;
  1220. left:=ttypeconvnode(left).left;
  1221. ttypeconvnode(hp).left:=nil;
  1222. hp.Free;
  1223. end;
  1224. if (right.nodetype=typeconvn) and (is_double(ttypeconvnode(right).left.resultdef) or is_single(ttypeconvnode(right).left.resultdef)) then
  1225. begin
  1226. hp:=right;
  1227. right:=ttypeconvnode(right).left;
  1228. ttypeconvnode(hp).left:=nil;
  1229. hp.Free;
  1230. end;
  1231. pass_left_right;
  1232. prepare_x87_locations(refnode);
  1233. hasref:=assigned(refnode);
  1234. case nodetype of
  1235. addn :
  1236. op:=ops_add[hasref];
  1237. muln :
  1238. op:=ops_mul[hasref];
  1239. subn :
  1240. if (nf_swapped in flags) then
  1241. op:=ops_rsub[hasref]
  1242. else
  1243. op:=ops_sub[hasref];
  1244. slashn :
  1245. if (nf_swapped in flags) then
  1246. op:=ops_rdiv[hasref]
  1247. else
  1248. op:=ops_div[hasref];
  1249. else
  1250. internalerror(2003042214);
  1251. end;
  1252. if hasref then
  1253. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1254. else
  1255. begin
  1256. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1257. tcgx86(cg).dec_fpu_stack;
  1258. end;
  1259. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1260. location.register:=NR_ST;
  1261. end;
  1262. procedure tx86addnode.second_cmpfloat;
  1263. {$ifdef i8086}
  1264. var
  1265. tmpref: treference;
  1266. {$endif i8086}
  1267. begin
  1268. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1269. begin
  1270. second_cmpfloatvector;
  1271. exit;
  1272. end;
  1273. pass_left_right;
  1274. force_left_and_right_fpureg;
  1275. {$ifndef x86_64}
  1276. if current_settings.cputype<cpu_Pentium2 then
  1277. begin
  1278. emit_none(A_FCOMPP,S_NO);
  1279. tcgx86(cg).dec_fpu_stack;
  1280. tcgx86(cg).dec_fpu_stack;
  1281. { load fpu flags }
  1282. {$ifdef i8086}
  1283. if current_settings.cputype < cpu_286 then
  1284. begin
  1285. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1286. emit_ref(A_FSTSW,S_NO,tmpref);
  1287. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1288. inc(tmpref.offset);
  1289. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1290. dec(tmpref.offset);
  1291. emit_none(A_SAHF,S_NO);
  1292. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1293. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1294. end
  1295. else
  1296. {$endif i8086}
  1297. begin
  1298. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1299. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1300. emit_none(A_SAHF,S_NO);
  1301. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1302. end;
  1303. if cs_fpu_fwait in current_settings.localswitches then
  1304. current_asmdata.CurrAsmList.concat(Taicpu.Op_none(A_FWAIT,S_NO));
  1305. end
  1306. else
  1307. {$endif x86_64}
  1308. begin
  1309. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1310. { fcomip pops only one fpu register }
  1311. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1312. tcgx86(cg).dec_fpu_stack;
  1313. tcgx86(cg).dec_fpu_stack;
  1314. end;
  1315. location_reset(location,LOC_FLAGS,OS_NO);
  1316. location.resflags:=getfpuresflags;
  1317. end;
  1318. {*****************************************************************************
  1319. Add64bit
  1320. *****************************************************************************}
  1321. procedure tx86addnode.second_add64bit;
  1322. begin
  1323. {$ifdef cpu64bitalu}
  1324. second_addordinal;
  1325. {$else cpu64bitalu}
  1326. { must be implemented separate }
  1327. internalerror(200402042);
  1328. {$endif cpu64bitalu}
  1329. end;
  1330. procedure tx86addnode.second_cmp64bit;
  1331. begin
  1332. {$ifdef cpu64bitalu}
  1333. second_cmpordinal;
  1334. {$else cpu64bitalu}
  1335. { must be implemented separate }
  1336. internalerror(200402043);
  1337. {$endif cpu64bitalu}
  1338. end;
  1339. {*****************************************************************************
  1340. AddOrdinal
  1341. *****************************************************************************}
  1342. procedure tx86addnode.second_addordinal;
  1343. var
  1344. opsize : tcgsize;
  1345. unsigned : boolean;
  1346. cgop : topcg;
  1347. checkoverflow : Boolean;
  1348. ovloc : tlocation;
  1349. tmpreg : TRegister;
  1350. begin
  1351. { determine if the comparison will be unsigned }
  1352. unsigned:=not(is_signed(left.resultdef)) or
  1353. not(is_signed(right.resultdef));
  1354. { assume no overflow checking is require }
  1355. checkoverflow := false;
  1356. ovloc.loc:=LOC_VOID;
  1357. case nodetype of
  1358. addn:
  1359. begin
  1360. cgop:=OP_ADD;
  1361. checkoverflow:=true;
  1362. end;
  1363. xorn :
  1364. begin
  1365. cgop:=OP_XOR;
  1366. end;
  1367. orn :
  1368. begin
  1369. cgop:=OP_OR;
  1370. end;
  1371. andn:
  1372. begin
  1373. cgop:=OP_AND;
  1374. end;
  1375. muln:
  1376. begin
  1377. checkoverflow:=true;
  1378. if unsigned then
  1379. cgop:=OP_MUL
  1380. else
  1381. cgop:=OP_IMUL;
  1382. end;
  1383. subn :
  1384. begin
  1385. checkoverflow:=true;
  1386. cgop:=OP_SUB;
  1387. end;
  1388. else
  1389. internalerror(2015022501);
  1390. end;
  1391. checkoverflow:=
  1392. checkoverflow and
  1393. needoverflowcheck;
  1394. opsize:=def_cgsize(left.resultdef);
  1395. pass_left_right;
  1396. { do we have to allocate a register? If yes, then three opcode instructions are better, however for sub three op code instructions
  1397. make no sense if right is a reference }
  1398. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER) and
  1399. ((nodetype<>subn) or not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE])) and
  1400. { 3 op mul makes only sense if a constant is involed }
  1401. ((nodetype<>muln) or (left.location.loc=LOC_CONSTANT) or (right.location.loc=LOC_CONSTANT)
  1402. {$ifndef i8086}
  1403. or ((CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (not(needoverflowcheck))
  1404. )
  1405. {$endif i8086}
  1406. ) and
  1407. (not(nodetype in [orn,andn,xorn]))) or
  1408. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1409. begin
  1410. { allocate registers }
  1411. force_reg_left_right(false,true);
  1412. set_result_location_reg;
  1413. if nodetype<>subn then
  1414. begin
  1415. if (right.location.loc<>LOC_CONSTANT) then
  1416. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1417. left.location.register,right.location.register,
  1418. location.register,checkoverflow,ovloc)
  1419. else
  1420. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1421. right.location.value,left.location.register,
  1422. location.register,checkoverflow,ovloc);
  1423. end
  1424. else { subtract is a special case since its not commutative }
  1425. begin
  1426. if (nf_swapped in flags) then
  1427. swapleftright;
  1428. if left.location.loc<>LOC_CONSTANT then
  1429. begin
  1430. if right.location.loc<>LOC_CONSTANT then
  1431. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1432. right.location.register,left.location.register,
  1433. location.register,checkoverflow,ovloc)
  1434. else
  1435. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1436. right.location.value,left.location.register,
  1437. location.register,checkoverflow,ovloc);
  1438. end
  1439. else
  1440. begin
  1441. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1442. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1443. left.location.value,tmpreg);
  1444. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1445. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1446. end;
  1447. end
  1448. end
  1449. else
  1450. begin
  1451. { at least one location should be a register, if yes, try to re-use it, so we can try two operand opcodes }
  1452. if left.location.loc<>LOC_REGISTER then
  1453. begin
  1454. if right.location.loc<>LOC_REGISTER then
  1455. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false)
  1456. else
  1457. begin
  1458. location_swap(left.location,right.location);
  1459. toggleflag(nf_swapped);
  1460. end;
  1461. end;
  1462. { at this point, left.location.loc should be LOC_REGISTER }
  1463. if right.location.loc=LOC_REGISTER then
  1464. begin
  1465. { when swapped another result register }
  1466. if (nodetype=subn) and (nf_swapped in flags) then
  1467. begin
  1468. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1469. left.location.register,right.location.register);
  1470. location_swap(left.location,right.location);
  1471. toggleflag(nf_swapped);
  1472. end
  1473. else
  1474. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1475. right.location.register,left.location.register);
  1476. end
  1477. else
  1478. begin
  1479. { right.location<>LOC_REGISTER }
  1480. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  1481. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  1482. if (nodetype=subn) and (nf_swapped in flags) then
  1483. begin
  1484. tmpreg:=left.location.register;
  1485. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1486. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  1487. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  1488. end
  1489. else
  1490. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  1491. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1492. end;
  1493. location_copy(location,left.location);
  1494. end;
  1495. { emit overflow check if required }
  1496. if checkoverflow then
  1497. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  1498. end;
  1499. procedure tx86addnode.second_addboolean;
  1500. begin
  1501. if (nodetype in [orn,andn]) and
  1502. (not(cs_full_boolean_eval in current_settings.localswitches) or
  1503. (nf_short_bool in flags)) then
  1504. inherited second_addboolean
  1505. else if is_64bit(left.resultdef) then
  1506. inherited
  1507. else
  1508. second_addordinal;
  1509. end;
  1510. procedure tx86addnode.second_cmpordinal;
  1511. var
  1512. opdef : tdef;
  1513. opsize : tcgsize;
  1514. unsigned : boolean;
  1515. begin
  1516. unsigned:=not(is_signed(left.resultdef)) or
  1517. not(is_signed(right.resultdef));
  1518. opdef:=left.resultdef;
  1519. opsize:=def_cgsize(opdef);
  1520. pass_left_right;
  1521. if (right.location.loc=LOC_CONSTANT) and
  1522. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1523. {$ifdef x86_64}
  1524. and ((not (opsize in [OS_64,OS_S64])) or (
  1525. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1526. ))
  1527. {$endif x86_64}
  1528. then
  1529. begin
  1530. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1531. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1532. end
  1533. else
  1534. begin
  1535. left_must_be_reg(opdef,opsize,false);
  1536. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1537. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1538. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1539. end;
  1540. location_reset(location,LOC_FLAGS,OS_NO);
  1541. location.resflags:=getresflags(unsigned);
  1542. end;
  1543. begin
  1544. caddnode:=tx86addnode;
  1545. end.