nx86add.pas 61 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. procedure second_addordinal;override;
  50. {$ifdef SUPPORT_MMX}
  51. procedure second_opmmx;override;
  52. {$endif SUPPORT_MMX}
  53. procedure second_opvector;override;
  54. end;
  55. implementation
  56. uses
  57. globtype,globals,
  58. verbose,cutils,compinnr,
  59. cpuinfo,
  60. aasmbase,aasmdata,aasmcpu,
  61. symconst,symdef,
  62. cgobj,hlcgobj,cgx86,cga,cgutils,
  63. tgobj,ncgutil,
  64. ncon,nset,ninl,
  65. defutil;
  66. {*****************************************************************************
  67. Helpers
  68. *****************************************************************************}
  69. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  70. var
  71. power : longint;
  72. hl4 : tasmlabel;
  73. r : Tregister;
  74. href : treference;
  75. begin
  76. { at this point, left.location.loc should be LOC_REGISTER }
  77. if right.location.loc=LOC_REGISTER then
  78. begin
  79. { right.location is a LOC_REGISTER }
  80. { when swapped another result register }
  81. if (nodetype=subn) and (nf_swapped in flags) then
  82. begin
  83. if extra_not then
  84. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  85. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  86. { newly swapped also set swapped flag }
  87. location_swap(left.location,right.location);
  88. toggleflag(nf_swapped);
  89. end
  90. else
  91. begin
  92. if extra_not then
  93. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  94. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  95. location_swap(left.location,right.location);
  96. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  97. end;
  98. end
  99. else
  100. begin
  101. { right.location is not a LOC_REGISTER }
  102. if (nodetype=subn) and (nf_swapped in flags) then
  103. begin
  104. if extra_not then
  105. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  106. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  107. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  108. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  109. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  110. end
  111. else
  112. begin
  113. { Optimizations when right.location is a constant value }
  114. if (op=A_CMP) and
  115. (nodetype in [equaln,unequaln]) and
  116. (right.location.loc=LOC_CONSTANT) and
  117. (right.location.value=0) then
  118. begin
  119. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  120. spilling, while 'test %reg,%reg' still requires loading into register.
  121. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  122. peephole optimizer (this optimization is currently available only for i386). }
  123. {$ifdef i386}
  124. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  125. {$else i386}
  126. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  127. {$endif i386}
  128. end
  129. else
  130. if (op=A_ADD) and
  131. (right.location.loc=LOC_CONSTANT) and
  132. (right.location.value=1) and
  133. not(cs_check_overflow in current_settings.localswitches) and
  134. UseIncDec then
  135. begin
  136. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  137. end
  138. else
  139. if (op=A_SUB) and
  140. (right.location.loc=LOC_CONSTANT) and
  141. (right.location.value=1) and
  142. not(cs_check_overflow in current_settings.localswitches) and
  143. UseIncDec then
  144. begin
  145. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  146. end
  147. else
  148. if (op=A_IMUL) and
  149. (right.location.loc=LOC_CONSTANT) and
  150. (ispowerof2(int64(right.location.value),power)) and
  151. not(cs_check_overflow in current_settings.localswitches) then
  152. begin
  153. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  154. end
  155. else if (op=A_IMUL) and
  156. (right.location.loc=LOC_CONSTANT) and
  157. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  158. (power in [1..3]) and
  159. not(cs_check_overflow in current_settings.localswitches) then
  160. begin
  161. reference_reset_base(href,left.location.register,0,0,[]);
  162. href.index:=left.location.register;
  163. href.scalefactor:=int64(right.location.value)-1;
  164. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  165. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  166. end
  167. else
  168. begin
  169. if extra_not then
  170. begin
  171. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  172. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  173. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  174. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  175. end
  176. else
  177. begin
  178. emit_op_right_left(op,opsize);
  179. end;
  180. end;
  181. end;
  182. end;
  183. { only in case of overflow operations }
  184. { produce overflow code }
  185. { we must put it here directly, because sign of operation }
  186. { is in unsigned VAR!! }
  187. if mboverflow then
  188. begin
  189. if cs_check_overflow in current_settings.localswitches then
  190. begin
  191. current_asmdata.getjumplabel(hl4);
  192. if unsigned then
  193. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  194. else
  195. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  196. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  197. cg.a_label(current_asmdata.CurrAsmList,hl4);
  198. end;
  199. end;
  200. end;
  201. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  202. begin
  203. { left location is not a register? }
  204. if (left.location.loc<>LOC_REGISTER) then
  205. begin
  206. { if right is register then we can swap the locations }
  207. if (not noswap) and
  208. (right.location.loc=LOC_REGISTER) then
  209. begin
  210. location_swap(left.location,right.location);
  211. toggleflag(nf_swapped);
  212. end
  213. else if (not noswap) and
  214. (right.location.loc=LOC_CREGISTER) then
  215. begin
  216. location_swap(left.location,right.location);
  217. toggleflag(nf_swapped);
  218. { maybe we can reuse a constant register when the
  219. operation is a comparison that doesn't change the
  220. value of the register }
  221. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  222. location:=left.location;
  223. end
  224. else
  225. begin
  226. { maybe we can reuse a constant register when the
  227. operation is a comparison that doesn't change the
  228. value of the register }
  229. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  230. end;
  231. end;
  232. if (right.location.loc<>LOC_CONSTANT) and
  233. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  234. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  235. if (left.location.loc<>LOC_CONSTANT) and
  236. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  237. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  238. end;
  239. procedure tx86addnode.force_left_and_right_fpureg;
  240. begin
  241. if (right.location.loc<>LOC_FPUREGISTER) then
  242. begin
  243. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  244. if (left.location.loc<>LOC_FPUREGISTER) then
  245. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  246. else
  247. { left was on the stack => swap }
  248. toggleflag(nf_swapped);
  249. end
  250. { the nominator in st0 }
  251. else if (left.location.loc<>LOC_FPUREGISTER) then
  252. begin
  253. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  254. end
  255. else
  256. begin
  257. { fpu operands are always in the wrong order on the stack }
  258. toggleflag(nf_swapped);
  259. end;
  260. end;
  261. { Makes sides suitable for executing an x87 instruction:
  262. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  263. everything else is loaded to FPU stack. }
  264. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  265. begin
  266. refnode:=nil;
  267. { later on, no mm registers are allowed, so transfer everything to memory here
  268. below it is loaded into an fpu register if neede }
  269. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  270. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  271. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  272. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  273. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  274. 0:
  275. begin
  276. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  277. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  278. InternalError(2013090803);
  279. if (left.location.size in [OS_F32,OS_F64]) then
  280. begin
  281. refnode:=left;
  282. toggleflag(nf_swapped);
  283. end
  284. else
  285. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  286. end;
  287. 1:
  288. begin { if left is on the stack then swap. }
  289. if (left.location.loc=LOC_FPUREGISTER) then
  290. refnode:=right
  291. else
  292. refnode:=left;
  293. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  294. InternalError(2013090801);
  295. if not (refnode.location.size in [OS_F32,OS_F64]) then
  296. begin
  297. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  298. if (refnode=right) then
  299. toggleflag(nf_swapped);
  300. refnode:=nil;
  301. end
  302. else
  303. begin
  304. if (refnode=left) then
  305. toggleflag(nf_swapped);
  306. end;
  307. end;
  308. 2: { fpu operands are always in the wrong order on the stack }
  309. toggleflag(nf_swapped);
  310. else
  311. InternalError(2013090802);
  312. end;
  313. end;
  314. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  315. {$ifdef x86_64}
  316. var
  317. tmpreg : tregister;
  318. {$endif x86_64}
  319. begin
  320. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  321. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  322. { left must be a register }
  323. case right.location.loc of
  324. LOC_REGISTER,
  325. LOC_CREGISTER :
  326. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  327. LOC_REFERENCE,
  328. LOC_CREFERENCE :
  329. begin
  330. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  331. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  332. end;
  333. LOC_CONSTANT :
  334. begin
  335. {$ifdef x86_64}
  336. { x86_64 only supports signed 32 bits constants directly }
  337. if (opsize in [OS_S64,OS_64]) and
  338. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  339. begin
  340. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  341. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  342. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  343. end
  344. else
  345. {$endif x86_64}
  346. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  347. end;
  348. else
  349. internalerror(200203232);
  350. end;
  351. end;
  352. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  353. begin
  354. case nodetype of
  355. equaln : getresflags:=F_E;
  356. unequaln : getresflags:=F_NE;
  357. else
  358. if not(unsigned) then
  359. begin
  360. if nf_swapped in flags then
  361. case nodetype of
  362. ltn : getresflags:=F_G;
  363. lten : getresflags:=F_GE;
  364. gtn : getresflags:=F_L;
  365. gten : getresflags:=F_LE;
  366. else
  367. internalerror(2013120105);
  368. end
  369. else
  370. case nodetype of
  371. ltn : getresflags:=F_L;
  372. lten : getresflags:=F_LE;
  373. gtn : getresflags:=F_G;
  374. gten : getresflags:=F_GE;
  375. else
  376. internalerror(2013120106);
  377. end;
  378. end
  379. else
  380. begin
  381. if nf_swapped in flags then
  382. case nodetype of
  383. ltn : getresflags:=F_A;
  384. lten : getresflags:=F_AE;
  385. gtn : getresflags:=F_B;
  386. gten : getresflags:=F_BE;
  387. else
  388. internalerror(2013120107);
  389. end
  390. else
  391. case nodetype of
  392. ltn : getresflags:=F_B;
  393. lten : getresflags:=F_BE;
  394. gtn : getresflags:=F_A;
  395. gten : getresflags:=F_AE;
  396. else
  397. internalerror(2013120108);
  398. end;
  399. end;
  400. end;
  401. end;
  402. function tx86addnode.getfpuresflags : tresflags;
  403. begin
  404. if (nodetype=equaln) then
  405. result:=F_FE
  406. else if (nodetype=unequaln) then
  407. result:=F_FNE
  408. else if (nf_swapped in flags) then
  409. case nodetype of
  410. ltn : result:=F_FA;
  411. lten : result:=F_FAE;
  412. gtn : result:=F_FB;
  413. gten : result:=F_FBE;
  414. else
  415. internalerror(2014031402);
  416. end
  417. else
  418. case nodetype of
  419. ltn : result:=F_FB;
  420. lten : result:=F_FBE;
  421. gtn : result:=F_FA;
  422. gten : result:=F_FAE;
  423. else
  424. internalerror(2014031403);
  425. end;
  426. end;
  427. {*****************************************************************************
  428. AddSmallSet
  429. *****************************************************************************}
  430. {$ifndef i8086}
  431. procedure tx86addnode.second_addsmallset;
  432. var
  433. setbase : aint;
  434. opdef : tdef;
  435. opsize : TCGSize;
  436. op : TAsmOp;
  437. extra_not,
  438. noswap : boolean;
  439. all_member_optimization:boolean;
  440. begin
  441. pass_left_right;
  442. noswap:=false;
  443. extra_not:=false;
  444. all_member_optimization:=false;
  445. opdef:=resultdef;
  446. opsize:=int_cgsize(opdef.size);
  447. if (left.resultdef.typ=setdef) then
  448. setbase:=tsetdef(left.resultdef).setbase
  449. else
  450. setbase:=tsetdef(right.resultdef).setbase;
  451. case nodetype of
  452. addn :
  453. begin
  454. { adding elements is not commutative }
  455. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  456. swapleftright;
  457. { are we adding set elements ? }
  458. if right.nodetype=setelementn then
  459. begin
  460. { no range support for smallsets! }
  461. if assigned(tsetelementnode(right).right) then
  462. internalerror(43244);
  463. { btsb isn't supported }
  464. if opsize=OS_8 then
  465. begin
  466. opsize:=OS_32;
  467. opdef:=u32inttype;
  468. end;
  469. { bts requires both elements to be registers }
  470. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  471. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  472. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  473. op:=A_BTS;
  474. noswap:=true;
  475. end
  476. else
  477. op:=A_OR;
  478. end;
  479. symdifn :
  480. op:=A_XOR;
  481. muln :
  482. op:=A_AND;
  483. subn :
  484. begin
  485. op:=A_AND;
  486. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  487. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  488. all_member_optimization:=true;
  489. if (not(nf_swapped in flags)) and
  490. (right.location.loc=LOC_CONSTANT) then
  491. right.location.value := not(right.location.value)
  492. else if (nf_swapped in flags) and
  493. (left.location.loc=LOC_CONSTANT) then
  494. left.location.value := not(left.location.value)
  495. else
  496. extra_not:=true;
  497. end;
  498. xorn :
  499. op:=A_XOR;
  500. orn :
  501. op:=A_OR;
  502. andn :
  503. op:=A_AND;
  504. else
  505. internalerror(2003042215);
  506. end;
  507. if all_member_optimization then
  508. begin
  509. {A set expression [0..31]-x can be implemented with a simple NOT.}
  510. if nf_swapped in flags then
  511. begin
  512. { newly swapped also set swapped flag }
  513. location_swap(left.location,right.location);
  514. toggleflag(nf_swapped);
  515. end;
  516. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  517. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  518. location:=right.location;
  519. end
  520. else
  521. begin
  522. { left must be a register }
  523. left_must_be_reg(opdef,opsize,noswap);
  524. emit_generic_code(op,opsize,true,extra_not,false);
  525. location_freetemp(current_asmdata.CurrAsmList,right.location);
  526. { left is always a register and contains the result }
  527. location:=left.location;
  528. end;
  529. { fix the changed opsize we did above because of the missing btsb }
  530. if opsize<>int_cgsize(resultdef.size) then
  531. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  532. end;
  533. {$endif not i8086}
  534. procedure tx86addnode.second_cmpsmallset;
  535. var
  536. opdef : tdef;
  537. opsize : TCGSize;
  538. op : TAsmOp;
  539. begin
  540. pass_left_right;
  541. opdef:=left.resultdef;
  542. opsize:=int_cgsize(opdef.size);
  543. case nodetype of
  544. equaln,
  545. unequaln :
  546. op:=A_CMP;
  547. lten,gten:
  548. begin
  549. if (not(nf_swapped in flags) and (nodetype = lten)) or
  550. ((nf_swapped in flags) and (nodetype = gten)) then
  551. swapleftright;
  552. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  553. emit_op_right_left(A_AND,opsize);
  554. op:=A_CMP;
  555. { warning: ugly hack, we need a JE so change the node to equaln }
  556. nodetype:=equaln;
  557. end;
  558. else
  559. internalerror(2003042215);
  560. end;
  561. { left must be a register }
  562. left_must_be_reg(opdef,opsize,false);
  563. emit_generic_code(op,opsize,true,false,false);
  564. location_freetemp(current_asmdata.CurrAsmList,right.location);
  565. location_freetemp(current_asmdata.CurrAsmList,left.location);
  566. location_reset(location,LOC_FLAGS,OS_NO);
  567. location.resflags:=getresflags(true);
  568. end;
  569. {*****************************************************************************
  570. AddMMX
  571. *****************************************************************************}
  572. {$ifdef SUPPORT_MMX}
  573. procedure tx86addnode.second_opmmx;
  574. var
  575. op : TAsmOp;
  576. cmpop : boolean;
  577. mmxbase : tmmxtype;
  578. hreg,
  579. hregister : tregister;
  580. begin
  581. pass_left_right;
  582. cmpop:=false;
  583. op:=A_NOP;
  584. mmxbase:=mmx_type(left.resultdef);
  585. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  586. case nodetype of
  587. addn :
  588. begin
  589. if (cs_mmx_saturation in current_settings.localswitches) then
  590. begin
  591. case mmxbase of
  592. mmxs8bit:
  593. op:=A_PADDSB;
  594. mmxu8bit:
  595. op:=A_PADDUSB;
  596. mmxs16bit,mmxfixed16:
  597. op:=A_PADDSW;
  598. mmxu16bit:
  599. op:=A_PADDUSW;
  600. end;
  601. end
  602. else
  603. begin
  604. case mmxbase of
  605. mmxs8bit,mmxu8bit:
  606. op:=A_PADDB;
  607. mmxs16bit,mmxu16bit,mmxfixed16:
  608. op:=A_PADDW;
  609. mmxs32bit,mmxu32bit:
  610. op:=A_PADDD;
  611. end;
  612. end;
  613. end;
  614. muln :
  615. begin
  616. case mmxbase of
  617. mmxs16bit,mmxu16bit:
  618. op:=A_PMULLW;
  619. mmxfixed16:
  620. op:=A_PMULHW;
  621. end;
  622. end;
  623. subn :
  624. begin
  625. if (cs_mmx_saturation in current_settings.localswitches) then
  626. begin
  627. case mmxbase of
  628. mmxs8bit:
  629. op:=A_PSUBSB;
  630. mmxu8bit:
  631. op:=A_PSUBUSB;
  632. mmxs16bit,mmxfixed16:
  633. op:=A_PSUBSB;
  634. mmxu16bit:
  635. op:=A_PSUBUSW;
  636. end;
  637. end
  638. else
  639. begin
  640. case mmxbase of
  641. mmxs8bit,mmxu8bit:
  642. op:=A_PSUBB;
  643. mmxs16bit,mmxu16bit,mmxfixed16:
  644. op:=A_PSUBW;
  645. mmxs32bit,mmxu32bit:
  646. op:=A_PSUBD;
  647. end;
  648. end;
  649. end;
  650. xorn:
  651. op:=A_PXOR;
  652. orn:
  653. op:=A_POR;
  654. andn:
  655. op:=A_PAND;
  656. else
  657. internalerror(2003042214);
  658. end;
  659. if op = A_NOP then
  660. internalerror(201408201);
  661. { left and right no register? }
  662. { then one must be demanded }
  663. if (left.location.loc<>LOC_MMXREGISTER) then
  664. begin
  665. if (right.location.loc=LOC_MMXREGISTER) then
  666. begin
  667. location_swap(left.location,right.location);
  668. toggleflag(nf_swapped);
  669. end
  670. else
  671. begin
  672. { register variable ? }
  673. if (left.location.loc=LOC_CMMXREGISTER) then
  674. begin
  675. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  676. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  677. end
  678. else
  679. begin
  680. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  681. internalerror(200203245);
  682. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  683. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  684. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  685. end;
  686. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  687. left.location.register:=hregister;
  688. end;
  689. end;
  690. { at this point, left.location.loc should be LOC_MMXREGISTER }
  691. if right.location.loc<>LOC_MMXREGISTER then
  692. begin
  693. if (nodetype=subn) and (nf_swapped in flags) then
  694. begin
  695. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  696. if right.location.loc=LOC_CMMXREGISTER then
  697. begin
  698. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  699. emit_reg_reg(op,S_NO,left.location.register,hreg);
  700. end
  701. else
  702. begin
  703. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  704. internalerror(200203247);
  705. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  706. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  707. emit_reg_reg(op,S_NO,left.location.register,hreg);
  708. end;
  709. location.register:=hreg;
  710. end
  711. else
  712. begin
  713. if (right.location.loc=LOC_CMMXREGISTER) then
  714. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  715. else
  716. begin
  717. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  718. internalerror(200203246);
  719. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  720. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  721. end;
  722. location.register:=left.location.register;
  723. end;
  724. end
  725. else
  726. begin
  727. { right.location=LOC_MMXREGISTER }
  728. if (nodetype=subn) and (nf_swapped in flags) then
  729. begin
  730. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  731. location_swap(left.location,right.location);
  732. toggleflag(nf_swapped);
  733. end
  734. else
  735. begin
  736. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  737. end;
  738. location.register:=left.location.register;
  739. end;
  740. location_freetemp(current_asmdata.CurrAsmList,right.location);
  741. if cmpop then
  742. location_freetemp(current_asmdata.CurrAsmList,left.location);
  743. end;
  744. {$endif SUPPORT_MMX}
  745. {*****************************************************************************
  746. AddFloat
  747. *****************************************************************************}
  748. procedure tx86addnode.second_addfloatsse;
  749. var
  750. op : topcg;
  751. sqr_sum : boolean;
  752. tmp : tnode;
  753. begin
  754. sqr_sum:=false;
  755. if (current_settings.fputype>=fpu_sse3) and
  756. use_vectorfpu(resultdef) and
  757. (nodetype in [addn,subn]) and
  758. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  759. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  760. begin
  761. sqr_sum:=true;
  762. tmp:=tinlinenode(left).left;
  763. tinlinenode(left).left:=nil;
  764. left.free;
  765. left:=tmp;
  766. tmp:=tinlinenode(right).left;
  767. tinlinenode(right).left:=nil;
  768. right.free;
  769. right:=tmp;
  770. end;
  771. pass_left_right;
  772. { fpu operands are always in reversed order on the stack }
  773. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  774. toggleflag(nf_swapped);
  775. if (nf_swapped in flags) then
  776. { can't use swapleftright if both are on the fpu stack, since then }
  777. { both are "R_ST" -> nothing would change -> manually switch }
  778. if (left.location.loc = LOC_FPUREGISTER) and
  779. (right.location.loc = LOC_FPUREGISTER) then
  780. emit_none(A_FXCH,S_NO)
  781. else
  782. swapleftright;
  783. case nodetype of
  784. addn :
  785. op:=OP_ADD;
  786. muln :
  787. op:=OP_MUL;
  788. subn :
  789. op:=OP_SUB;
  790. slashn :
  791. op:=OP_DIV;
  792. else
  793. internalerror(200312231);
  794. end;
  795. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  796. if sqr_sum then
  797. begin
  798. if nf_swapped in flags then
  799. swapleftright;
  800. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  801. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  802. location:=left.location;
  803. if is_double(resultdef) then
  804. begin
  805. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  806. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  807. case nodetype of
  808. addn:
  809. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  810. subn:
  811. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  812. else
  813. internalerror(201108162);
  814. end;
  815. end
  816. else
  817. begin
  818. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  819. { ensure that bits 64..127 contain valid values }
  820. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  821. { the data is now in bits 0..32 and 64..95 }
  822. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  823. case nodetype of
  824. addn:
  825. begin
  826. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  827. end;
  828. subn:
  829. begin
  830. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  831. end;
  832. else
  833. internalerror(201108163);
  834. end;
  835. end
  836. end
  837. { we can use only right as left operand if the operation is commutative }
  838. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  839. begin
  840. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  841. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  842. { force floating point reg. location to be written to memory,
  843. we don't force it to mm register because writing to memory
  844. allows probably shorter code because there is no direct fpu->mm register
  845. copy instruction
  846. }
  847. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  848. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  849. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  850. end
  851. else
  852. begin
  853. if nf_swapped in flags then
  854. swapleftright;
  855. { force floating point reg. location to be written to memory,
  856. we don't force it to mm register because writing to memory
  857. allows probably shorter code because there is no direct fpu->mm register
  858. copy instruction
  859. }
  860. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  861. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  862. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  863. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  864. { force floating point reg. location to be written to memory,
  865. we don't force it to mm register because writing to memory
  866. allows probably shorter code because there is no direct fpu->mm register
  867. copy instruction
  868. }
  869. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  870. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  871. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  872. end;
  873. end;
  874. procedure tx86addnode.second_addfloatavx;
  875. var
  876. op : topcg;
  877. sqr_sum : boolean;
  878. {$ifdef dummy}
  879. tmp : tnode;
  880. {$endif dummy}
  881. begin
  882. sqr_sum:=false;
  883. {$ifdef dummy}
  884. if (current_settings.fputype>=fpu_sse3) and
  885. use_vectorfpu(resultdef) and
  886. (nodetype in [addn,subn]) and
  887. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  888. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  889. begin
  890. sqr_sum:=true;
  891. tmp:=tinlinenode(left).left;
  892. tinlinenode(left).left:=nil;
  893. left.free;
  894. left:=tmp;
  895. tmp:=tinlinenode(right).left;
  896. tinlinenode(right).left:=nil;
  897. right.free;
  898. right:=tmp;
  899. end;
  900. {$endif dummy}
  901. pass_left_right;
  902. { fpu operands are always in reversed order on the stack }
  903. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  904. toggleflag(nf_swapped);
  905. if (nf_swapped in flags) then
  906. { can't use swapleftright if both are on the fpu stack, since then }
  907. { both are "R_ST" -> nothing would change -> manually switch }
  908. if (left.location.loc = LOC_FPUREGISTER) and
  909. (right.location.loc = LOC_FPUREGISTER) then
  910. emit_none(A_FXCH,S_NO)
  911. else
  912. swapleftright;
  913. case nodetype of
  914. addn :
  915. op:=OP_ADD;
  916. muln :
  917. op:=OP_MUL;
  918. subn :
  919. op:=OP_SUB;
  920. slashn :
  921. op:=OP_DIV;
  922. else
  923. internalerror(200312231);
  924. end;
  925. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  926. if sqr_sum then
  927. begin
  928. if nf_swapped in flags then
  929. swapleftright;
  930. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  931. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  932. location:=left.location;
  933. if is_double(resultdef) then
  934. begin
  935. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  936. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  937. case nodetype of
  938. addn:
  939. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  940. subn:
  941. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  942. else
  943. internalerror(201108162);
  944. end;
  945. end
  946. else
  947. begin
  948. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  949. { ensure that bits 64..127 contain valid values }
  950. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  951. { the data is now in bits 0..32 and 64..95 }
  952. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  953. case nodetype of
  954. addn:
  955. begin
  956. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  957. end;
  958. subn:
  959. begin
  960. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  961. end;
  962. else
  963. internalerror(201108163);
  964. end;
  965. end
  966. end
  967. { left*2 ? }
  968. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  969. begin
  970. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  971. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  972. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  973. left.location.register,
  974. left.location.register,
  975. location.register,
  976. mms_movescalar);
  977. end
  978. { right*2 ? }
  979. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  980. begin
  981. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  982. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  983. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  984. right.location.register,
  985. right.location.register,
  986. location.register,
  987. mms_movescalar);
  988. end
  989. { we can use only right as left operand if the operation is commutative }
  990. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  991. begin
  992. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  993. { force floating point reg. location to be written to memory,
  994. we don't force it to mm register because writing to memory
  995. allows probably shorter code because there is no direct fpu->mm register
  996. copy instruction
  997. }
  998. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  999. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1000. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1001. left.location,
  1002. right.location.register,
  1003. location.register,
  1004. mms_movescalar);
  1005. end
  1006. else
  1007. begin
  1008. if (nf_swapped in flags) then
  1009. swapleftright;
  1010. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1011. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1012. { force floating point reg. location to be written to memory,
  1013. we don't force it to mm register because writing to memory
  1014. allows probably shorter code because there is no direct fpu->mm register
  1015. copy instruction
  1016. }
  1017. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1018. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1019. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1020. right.location,
  1021. left.location.register,
  1022. location.register,
  1023. mms_movescalar);
  1024. end;
  1025. end;
  1026. function tx86addnode.use_fma : boolean;
  1027. begin
  1028. {$ifndef i8086}
  1029. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1030. Result:=use_vectorfpu(resultdef) and
  1031. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1032. {$else i8086}
  1033. Result:=inherited use_fma;
  1034. {$endif i8086}
  1035. end;
  1036. procedure tx86addnode.second_cmpfloatvector;
  1037. var
  1038. op : tasmop;
  1039. const
  1040. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1041. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1042. begin
  1043. if is_single(left.resultdef) then
  1044. op:=ops_single[UseAVX]
  1045. else if is_double(left.resultdef) then
  1046. op:=ops_double[UseAVX]
  1047. else
  1048. internalerror(200402222);
  1049. pass_left_right;
  1050. location_reset(location,LOC_FLAGS,OS_NO);
  1051. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1052. memory (not to mm registers because one of the memory locations can be used
  1053. directly in compare instruction, yielding shorter code) }
  1054. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1055. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1056. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1057. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1058. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1059. begin
  1060. case left.location.loc of
  1061. LOC_REFERENCE,LOC_CREFERENCE:
  1062. begin
  1063. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1064. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1065. end;
  1066. LOC_MMREGISTER,LOC_CMMREGISTER:
  1067. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1068. else
  1069. internalerror(200402221);
  1070. end;
  1071. toggleflag(nf_swapped);
  1072. end
  1073. else
  1074. begin
  1075. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1076. case right.location.loc of
  1077. LOC_REFERENCE,LOC_CREFERENCE:
  1078. begin
  1079. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1080. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1081. end;
  1082. LOC_MMREGISTER,LOC_CMMREGISTER:
  1083. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1084. else
  1085. internalerror(200402223);
  1086. end;
  1087. end;
  1088. location.resflags:=getfpuresflags;
  1089. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1090. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1091. end;
  1092. procedure tx86addnode.second_opvector;
  1093. var
  1094. op : topcg;
  1095. begin
  1096. pass_left_right;
  1097. if (nf_swapped in flags) then
  1098. swapleftright;
  1099. case nodetype of
  1100. addn :
  1101. op:=OP_ADD;
  1102. muln :
  1103. op:=OP_MUL;
  1104. subn :
  1105. op:=OP_SUB;
  1106. slashn :
  1107. op:=OP_DIV;
  1108. else
  1109. internalerror(200610071);
  1110. end;
  1111. if fits_in_mm_register(left.resultdef) then
  1112. begin
  1113. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1114. { we can use only right as left operand if the operation is commutative }
  1115. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1116. begin
  1117. location.register:=right.location.register;
  1118. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1119. end
  1120. else
  1121. begin
  1122. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1123. location.register:=left.location.register;
  1124. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1125. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1126. end;
  1127. end
  1128. else
  1129. begin
  1130. { not yet supported }
  1131. internalerror(200610072);
  1132. end
  1133. end;
  1134. procedure tx86addnode.second_addfloat;
  1135. const
  1136. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1137. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1138. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1139. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1140. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1141. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1142. var
  1143. op : TAsmOp;
  1144. refnode : tnode;
  1145. hasref : boolean;
  1146. begin
  1147. if use_vectorfpu(resultdef) then
  1148. begin
  1149. if UseAVX then
  1150. second_addfloatavx
  1151. else
  1152. second_addfloatsse;
  1153. exit;
  1154. end;
  1155. pass_left_right;
  1156. prepare_x87_locations(refnode);
  1157. hasref:=assigned(refnode);
  1158. case nodetype of
  1159. addn :
  1160. op:=ops_add[hasref];
  1161. muln :
  1162. op:=ops_mul[hasref];
  1163. subn :
  1164. if (nf_swapped in flags) then
  1165. op:=ops_rsub[hasref]
  1166. else
  1167. op:=ops_sub[hasref];
  1168. slashn :
  1169. if (nf_swapped in flags) then
  1170. op:=ops_rdiv[hasref]
  1171. else
  1172. op:=ops_div[hasref];
  1173. else
  1174. internalerror(2003042214);
  1175. end;
  1176. if hasref then
  1177. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1178. else
  1179. begin
  1180. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1181. tcgx86(cg).dec_fpu_stack;
  1182. end;
  1183. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1184. location.register:=NR_ST;
  1185. end;
  1186. procedure tx86addnode.second_cmpfloat;
  1187. {$ifdef i8086}
  1188. var
  1189. tmpref: treference;
  1190. {$endif i8086}
  1191. begin
  1192. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1193. begin
  1194. second_cmpfloatvector;
  1195. exit;
  1196. end;
  1197. pass_left_right;
  1198. force_left_and_right_fpureg;
  1199. {$ifndef x86_64}
  1200. if current_settings.cputype<cpu_Pentium2 then
  1201. begin
  1202. emit_none(A_FCOMPP,S_NO);
  1203. tcgx86(cg).dec_fpu_stack;
  1204. tcgx86(cg).dec_fpu_stack;
  1205. { load fpu flags }
  1206. {$ifdef i8086}
  1207. if current_settings.cputype < cpu_286 then
  1208. begin
  1209. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1210. emit_ref(A_FSTSW,S_NO,tmpref);
  1211. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1212. inc(tmpref.offset);
  1213. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1214. dec(tmpref.offset);
  1215. emit_none(A_SAHF,S_NO);
  1216. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1217. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1218. end
  1219. else
  1220. {$endif i8086}
  1221. begin
  1222. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1223. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1224. emit_none(A_SAHF,S_NO);
  1225. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1226. end;
  1227. end
  1228. else
  1229. {$endif x86_64}
  1230. begin
  1231. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1232. { fcomip pops only one fpu register }
  1233. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1234. tcgx86(cg).dec_fpu_stack;
  1235. tcgx86(cg).dec_fpu_stack;
  1236. end;
  1237. location_reset(location,LOC_FLAGS,OS_NO);
  1238. location.resflags:=getfpuresflags;
  1239. end;
  1240. {*****************************************************************************
  1241. Add64bit
  1242. *****************************************************************************}
  1243. procedure tx86addnode.second_add64bit;
  1244. begin
  1245. {$ifdef cpu64bitalu}
  1246. second_addordinal;
  1247. {$else cpu64bitalu}
  1248. { must be implemented separate }
  1249. internalerror(200402042);
  1250. {$endif cpu64bitalu}
  1251. end;
  1252. procedure tx86addnode.second_cmp64bit;
  1253. begin
  1254. {$ifdef cpu64bitalu}
  1255. second_cmpordinal;
  1256. {$else cpu64bitalu}
  1257. { must be implemented separate }
  1258. internalerror(200402043);
  1259. {$endif cpu64bitalu}
  1260. end;
  1261. {*****************************************************************************
  1262. AddOrdinal
  1263. *****************************************************************************}
  1264. procedure tx86addnode.second_addordinal;
  1265. var
  1266. opsize : tcgsize;
  1267. unsigned : boolean;
  1268. cgop : topcg;
  1269. checkoverflow : Boolean;
  1270. ovloc : tlocation;
  1271. tmpreg : TRegister;
  1272. begin
  1273. { determine if the comparison will be unsigned }
  1274. unsigned:=not(is_signed(left.resultdef)) or
  1275. not(is_signed(right.resultdef));
  1276. { assume no overflow checking is require }
  1277. checkoverflow := false;
  1278. ovloc.loc:=LOC_VOID;
  1279. case nodetype of
  1280. addn:
  1281. begin
  1282. cgop:=OP_ADD;
  1283. checkoverflow:=true;
  1284. end;
  1285. xorn :
  1286. begin
  1287. cgop:=OP_XOR;
  1288. end;
  1289. orn :
  1290. begin
  1291. cgop:=OP_OR;
  1292. end;
  1293. andn:
  1294. begin
  1295. cgop:=OP_AND;
  1296. end;
  1297. muln:
  1298. begin
  1299. checkoverflow:=true;
  1300. if unsigned then
  1301. cgop:=OP_MUL
  1302. else
  1303. cgop:=OP_IMUL;
  1304. end;
  1305. subn :
  1306. begin
  1307. checkoverflow:=true;
  1308. cgop:=OP_SUB;
  1309. end;
  1310. else
  1311. internalerror(2015022501);
  1312. end;
  1313. checkoverflow:=
  1314. checkoverflow and
  1315. (left.resultdef.typ<>pointerdef) and
  1316. (right.resultdef.typ<>pointerdef) and
  1317. (cs_check_overflow in current_settings.localswitches);
  1318. opsize:=def_cgsize(left.resultdef);
  1319. pass_left_right;
  1320. { do have to allocate a register? If yes, then three opcode instructions are better }
  1321. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER)) or
  1322. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1323. begin
  1324. { allocate registers }
  1325. force_reg_left_right(false,true);
  1326. set_result_location_reg;
  1327. if nodetype<>subn then
  1328. begin
  1329. if (right.location.loc<>LOC_CONSTANT) then
  1330. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1331. left.location.register,right.location.register,
  1332. location.register,checkoverflow,ovloc)
  1333. else
  1334. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1335. right.location.value,left.location.register,
  1336. location.register,checkoverflow,ovloc);
  1337. end
  1338. else { subtract is a special case since its not commutative }
  1339. begin
  1340. if (nf_swapped in flags) then
  1341. swapleftright;
  1342. if left.location.loc<>LOC_CONSTANT then
  1343. begin
  1344. if right.location.loc<>LOC_CONSTANT then
  1345. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1346. right.location.register,left.location.register,
  1347. location.register,checkoverflow,ovloc)
  1348. else
  1349. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1350. right.location.value,left.location.register,
  1351. location.register,checkoverflow,ovloc);
  1352. end
  1353. else
  1354. begin
  1355. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1356. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1357. left.location.value,tmpreg);
  1358. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1359. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1360. end;
  1361. end
  1362. end
  1363. else
  1364. begin
  1365. { at least one location is a register, re-use it, so we can try two operand opcodes }
  1366. if left.location.loc<>LOC_REGISTER then
  1367. begin
  1368. if right.location.loc<>LOC_REGISTER then
  1369. begin
  1370. { tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1371. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,left.location,tmpreg);
  1372. location_reset(left.location,LOC_REGISTER,opsize);
  1373. left.location.register:=tmpreg;
  1374. }
  1375. Internalerror(2018031102);
  1376. end
  1377. else
  1378. begin
  1379. location_swap(left.location,right.location);
  1380. toggleflag(nf_swapped);
  1381. end;
  1382. end;
  1383. { at this point, left.location.loc should be LOC_REGISTER }
  1384. if right.location.loc=LOC_REGISTER then
  1385. begin
  1386. { when swapped another result register }
  1387. if (nodetype=subn) and (nf_swapped in flags) then
  1388. begin
  1389. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1390. left.location.register,right.location.register);
  1391. location_swap(left.location,right.location);
  1392. toggleflag(nf_swapped);
  1393. end
  1394. else
  1395. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1396. right.location.register,left.location.register);
  1397. end
  1398. else
  1399. begin
  1400. { right.location<>LOC_REGISTER }
  1401. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  1402. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  1403. if (nodetype=subn) and (nf_swapped in flags) then
  1404. begin
  1405. tmpreg:=left.location.register;
  1406. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1407. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  1408. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  1409. end
  1410. else
  1411. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  1412. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1413. end;
  1414. location_copy(location,left.location);
  1415. end;
  1416. { emit overflow check if required }
  1417. if checkoverflow then
  1418. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  1419. end;
  1420. procedure tx86addnode.second_cmpordinal;
  1421. var
  1422. opdef : tdef;
  1423. opsize : tcgsize;
  1424. unsigned : boolean;
  1425. begin
  1426. unsigned:=not(is_signed(left.resultdef)) or
  1427. not(is_signed(right.resultdef));
  1428. opdef:=left.resultdef;
  1429. opsize:=def_cgsize(opdef);
  1430. pass_left_right;
  1431. if (right.location.loc=LOC_CONSTANT) and
  1432. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1433. {$ifdef x86_64}
  1434. and ((not (opsize in [OS_64,OS_S64])) or (
  1435. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1436. ))
  1437. {$endif x86_64}
  1438. then
  1439. begin
  1440. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1441. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1442. end
  1443. else
  1444. begin
  1445. left_must_be_reg(opdef,opsize,false);
  1446. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1447. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1448. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1449. end;
  1450. location_reset(location,LOC_FLAGS,OS_NO);
  1451. location.resflags:=getresflags(unsigned);
  1452. end;
  1453. begin
  1454. caddnode:=tx86addnode;
  1455. end.