nx86add.pas 64 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize;AllocFlags:boolean);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. procedure second_addordinal;override;
  50. {$ifdef SUPPORT_MMX}
  51. procedure second_opmmx;override;
  52. {$endif SUPPORT_MMX}
  53. procedure second_opvector;override;
  54. end;
  55. implementation
  56. uses
  57. globtype,globals,
  58. verbose,cutils,compinnr,
  59. cpuinfo,
  60. aasmbase,aasmdata,aasmcpu,
  61. symconst,symdef,
  62. cgobj,hlcgobj,cgx86,cga,cgutils,
  63. tgobj,ncgutil,
  64. ncon,nset,ninl,
  65. defutil;
  66. { Range check must be disabled explicitly as the code serves
  67. on three different architecture sizes }
  68. {$R-}
  69. {*****************************************************************************
  70. Helpers
  71. *****************************************************************************}
  72. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  73. var
  74. power : longint;
  75. hl4 : tasmlabel;
  76. r : Tregister;
  77. href : treference;
  78. overflowcheck: boolean;
  79. comparison: boolean;
  80. begin
  81. overflowcheck:=(cs_check_overflow in current_settings.localswitches) and
  82. (left.resultdef.typ<>pointerdef) and
  83. (right.resultdef.typ<>pointerdef) and
  84. not(nf_internal in flags);
  85. comparison:=
  86. (op=A_CMP) or (op=A_TEST) or (op=A_BT) or is_boolean(resultdef);
  87. { at this point, left.location.loc should be LOC_REGISTER }
  88. if right.location.loc=LOC_REGISTER then
  89. begin
  90. { right.location is a LOC_REGISTER }
  91. { when swapped another result register }
  92. if (nodetype=subn) and (nf_swapped in flags) then
  93. begin
  94. if extra_not then
  95. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  96. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  97. { newly swapped also set swapped flag }
  98. location_swap(left.location,right.location);
  99. toggleflag(nf_swapped);
  100. end
  101. else
  102. begin
  103. if extra_not then
  104. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  105. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  106. location_swap(left.location,right.location);
  107. if comparison then
  108. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  109. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  110. end;
  111. end
  112. else
  113. begin
  114. { right.location is not a LOC_REGISTER }
  115. if (nodetype=subn) and (nf_swapped in flags) then
  116. begin
  117. if extra_not then
  118. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  119. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  120. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  121. if comparison then
  122. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  123. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  124. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  125. end
  126. else
  127. begin
  128. { Optimizations when right.location is a constant value }
  129. if (op=A_CMP) and
  130. (nodetype in [equaln,unequaln]) and
  131. (right.location.loc=LOC_CONSTANT) and
  132. (right.location.value=0) then
  133. begin
  134. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  135. spilling, while 'test %reg,%reg' still requires loading into register.
  136. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  137. peephole optimizer (this optimization is currently available only for i386). }
  138. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  139. {$ifdef i386}
  140. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  141. {$else i386}
  142. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  143. {$endif i386}
  144. end
  145. else
  146. if (op=A_ADD) and
  147. (right.location.loc=LOC_CONSTANT) and
  148. (right.location.value=1) and
  149. not(cs_check_overflow in current_settings.localswitches) and
  150. UseIncDec then
  151. begin
  152. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  153. end
  154. else
  155. if (op=A_SUB) and
  156. (right.location.loc=LOC_CONSTANT) and
  157. (right.location.value=1) and
  158. not(cs_check_overflow in current_settings.localswitches) and
  159. UseIncDec then
  160. begin
  161. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  162. end
  163. else
  164. if (op=A_IMUL) and
  165. (right.location.loc=LOC_CONSTANT) and
  166. (ispowerof2(int64(right.location.value),power)) and
  167. not(cs_check_overflow in current_settings.localswitches) then
  168. begin
  169. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  170. end
  171. else if (op=A_IMUL) and
  172. (right.location.loc=LOC_CONSTANT) and
  173. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  174. (power in [1..3]) and
  175. not(cs_check_overflow in current_settings.localswitches) then
  176. begin
  177. reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
  178. href.index:=left.location.register;
  179. href.scalefactor:=int64(right.location.value)-1;
  180. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  181. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  182. end
  183. else
  184. begin
  185. if extra_not then
  186. begin
  187. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  188. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  189. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  190. if comparison or (mboverflow and overflowcheck) then
  191. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  192. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  193. end
  194. else
  195. emit_op_right_left(op,opsize,comparison or (mboverflow and overflowcheck));
  196. end;
  197. end;
  198. end;
  199. { only in case of overflow operations }
  200. { produce overflow code }
  201. { we must put it here directly, because sign of operation }
  202. { is in unsigned VAR!! }
  203. if mboverflow then
  204. begin
  205. if cs_check_overflow in current_settings.localswitches then
  206. begin
  207. current_asmdata.getjumplabel(hl4);
  208. if unsigned then
  209. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  210. else
  211. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  212. if not comparison then
  213. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  214. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  215. cg.a_label(current_asmdata.CurrAsmList,hl4);
  216. end;
  217. end;
  218. end;
  219. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  220. begin
  221. { left location is not a register? }
  222. if (left.location.loc<>LOC_REGISTER) then
  223. begin
  224. { if right is register then we can swap the locations }
  225. if (not noswap) and
  226. (right.location.loc=LOC_REGISTER) then
  227. begin
  228. location_swap(left.location,right.location);
  229. toggleflag(nf_swapped);
  230. end
  231. else if (not noswap) and
  232. (right.location.loc=LOC_CREGISTER) then
  233. begin
  234. location_swap(left.location,right.location);
  235. toggleflag(nf_swapped);
  236. { maybe we can reuse a constant register when the
  237. operation is a comparison that doesn't change the
  238. value of the register }
  239. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  240. location:=left.location;
  241. end
  242. else
  243. begin
  244. { maybe we can reuse a constant register when the
  245. operation is a comparison that doesn't change the
  246. value of the register }
  247. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  248. end;
  249. end;
  250. if (right.location.loc<>LOC_CONSTANT) and
  251. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  252. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  253. if (left.location.loc<>LOC_CONSTANT) and
  254. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  255. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  256. end;
  257. procedure tx86addnode.force_left_and_right_fpureg;
  258. begin
  259. if (right.location.loc<>LOC_FPUREGISTER) then
  260. begin
  261. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  262. if (left.location.loc<>LOC_FPUREGISTER) then
  263. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  264. else
  265. { left was on the stack => swap }
  266. toggleflag(nf_swapped);
  267. end
  268. { the nominator in st0 }
  269. else if (left.location.loc<>LOC_FPUREGISTER) then
  270. begin
  271. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  272. end
  273. else
  274. begin
  275. { fpu operands are always in the wrong order on the stack }
  276. toggleflag(nf_swapped);
  277. end;
  278. end;
  279. { Makes sides suitable for executing an x87 instruction:
  280. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  281. everything else is loaded to FPU stack. }
  282. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  283. begin
  284. refnode:=nil;
  285. { later on, no mm registers are allowed, so transfer everything to memory here
  286. below it is loaded into an fpu register if neede }
  287. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  288. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  289. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  290. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  291. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  292. 0:
  293. begin
  294. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  295. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  296. InternalError(2013090803);
  297. if (left.location.size in [OS_F32,OS_F64]) then
  298. begin
  299. refnode:=left;
  300. toggleflag(nf_swapped);
  301. end
  302. else
  303. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  304. end;
  305. 1:
  306. begin { if left is on the stack then swap. }
  307. if (left.location.loc=LOC_FPUREGISTER) then
  308. refnode:=right
  309. else
  310. refnode:=left;
  311. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  312. InternalError(2013090801);
  313. if not (refnode.location.size in [OS_F32,OS_F64]) then
  314. begin
  315. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  316. if (refnode=right) then
  317. toggleflag(nf_swapped);
  318. refnode:=nil;
  319. end
  320. else
  321. begin
  322. if (refnode=left) then
  323. toggleflag(nf_swapped);
  324. end;
  325. end;
  326. 2: { fpu operands are always in the wrong order on the stack }
  327. toggleflag(nf_swapped);
  328. else
  329. InternalError(2013090802);
  330. end;
  331. end;
  332. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize;AllocFlags:boolean);
  333. {$ifdef x86_64}
  334. var
  335. tmpreg : tregister;
  336. {$endif x86_64}
  337. begin
  338. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  339. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  340. { left must be a register }
  341. case right.location.loc of
  342. LOC_REGISTER,
  343. LOC_CREGISTER :
  344. begin
  345. if AllocFlags then
  346. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  347. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  348. end;
  349. LOC_REFERENCE,
  350. LOC_CREFERENCE :
  351. begin
  352. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  353. if AllocFlags then
  354. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  355. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  356. end;
  357. LOC_CONSTANT :
  358. begin
  359. {$ifdef x86_64}
  360. { x86_64 only supports signed 32 bits constants directly }
  361. if (opsize in [OS_S64,OS_64]) and
  362. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  363. begin
  364. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  365. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  366. if AllocFlags then
  367. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  368. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  369. end
  370. else
  371. {$endif x86_64}
  372. begin
  373. if AllocFlags then
  374. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  375. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  376. end;
  377. end;
  378. else
  379. internalerror(200203232);
  380. end;
  381. end;
  382. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  383. begin
  384. case nodetype of
  385. equaln : getresflags:=F_E;
  386. unequaln : getresflags:=F_NE;
  387. else
  388. if not(unsigned) then
  389. begin
  390. if nf_swapped in flags then
  391. case nodetype of
  392. ltn : getresflags:=F_G;
  393. lten : getresflags:=F_GE;
  394. gtn : getresflags:=F_L;
  395. gten : getresflags:=F_LE;
  396. else
  397. internalerror(2013120105);
  398. end
  399. else
  400. case nodetype of
  401. ltn : getresflags:=F_L;
  402. lten : getresflags:=F_LE;
  403. gtn : getresflags:=F_G;
  404. gten : getresflags:=F_GE;
  405. else
  406. internalerror(2013120106);
  407. end;
  408. end
  409. else
  410. begin
  411. if nf_swapped in flags then
  412. case nodetype of
  413. ltn : getresflags:=F_A;
  414. lten : getresflags:=F_AE;
  415. gtn : getresflags:=F_B;
  416. gten : getresflags:=F_BE;
  417. else
  418. internalerror(2013120107);
  419. end
  420. else
  421. case nodetype of
  422. ltn : getresflags:=F_B;
  423. lten : getresflags:=F_BE;
  424. gtn : getresflags:=F_A;
  425. gten : getresflags:=F_AE;
  426. else
  427. internalerror(2013120108);
  428. end;
  429. end;
  430. end;
  431. end;
  432. function tx86addnode.getfpuresflags : tresflags;
  433. begin
  434. if (nodetype=equaln) then
  435. result:=F_FE
  436. else if (nodetype=unequaln) then
  437. result:=F_FNE
  438. else if (nf_swapped in flags) then
  439. case nodetype of
  440. ltn : result:=F_FA;
  441. lten : result:=F_FAE;
  442. gtn : result:=F_FB;
  443. gten : result:=F_FBE;
  444. else
  445. internalerror(2014031402);
  446. end
  447. else
  448. case nodetype of
  449. ltn : result:=F_FB;
  450. lten : result:=F_FBE;
  451. gtn : result:=F_FA;
  452. gten : result:=F_FAE;
  453. else
  454. internalerror(2014031403);
  455. end;
  456. end;
  457. {*****************************************************************************
  458. AddSmallSet
  459. *****************************************************************************}
  460. {$ifndef i8086}
  461. procedure tx86addnode.second_addsmallset;
  462. var
  463. setbase : aint;
  464. opdef : tdef;
  465. opsize : TCGSize;
  466. op : TAsmOp;
  467. extra_not,
  468. noswap : boolean;
  469. all_member_optimization:boolean;
  470. begin
  471. pass_left_right;
  472. noswap:=false;
  473. extra_not:=false;
  474. all_member_optimization:=false;
  475. opdef:=resultdef;
  476. opsize:=int_cgsize(opdef.size);
  477. if (left.resultdef.typ=setdef) then
  478. setbase:=tsetdef(left.resultdef).setbase
  479. else
  480. setbase:=tsetdef(right.resultdef).setbase;
  481. case nodetype of
  482. addn :
  483. begin
  484. { adding elements is not commutative }
  485. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  486. swapleftright;
  487. { are we adding set elements ? }
  488. if right.nodetype=setelementn then
  489. begin
  490. { no range support for smallsets! }
  491. if assigned(tsetelementnode(right).right) then
  492. internalerror(43244);
  493. { btsb isn't supported }
  494. if opsize=OS_8 then
  495. begin
  496. opsize:=OS_32;
  497. opdef:=u32inttype;
  498. end;
  499. { bts requires both elements to be registers }
  500. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  501. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  502. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  503. op:=A_BTS;
  504. noswap:=true;
  505. end
  506. else
  507. op:=A_OR;
  508. end;
  509. symdifn :
  510. op:=A_XOR;
  511. muln :
  512. op:=A_AND;
  513. subn :
  514. begin
  515. op:=A_AND;
  516. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  517. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  518. all_member_optimization:=true;
  519. if (not(nf_swapped in flags)) and
  520. (right.location.loc=LOC_CONSTANT) then
  521. right.location.value := not(right.location.value)
  522. else if (nf_swapped in flags) and
  523. (left.location.loc=LOC_CONSTANT) then
  524. left.location.value := not(left.location.value)
  525. else
  526. extra_not:=true;
  527. end;
  528. xorn :
  529. op:=A_XOR;
  530. orn :
  531. op:=A_OR;
  532. andn :
  533. op:=A_AND;
  534. else
  535. internalerror(2003042215);
  536. end;
  537. if all_member_optimization then
  538. begin
  539. {A set expression [0..31]-x can be implemented with a simple NOT.}
  540. if nf_swapped in flags then
  541. begin
  542. { newly swapped also set swapped flag }
  543. location_swap(left.location,right.location);
  544. toggleflag(nf_swapped);
  545. end;
  546. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  547. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  548. location:=right.location;
  549. end
  550. else
  551. begin
  552. { can we use the BMI1 instruction andn? }
  553. if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
  554. (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
  555. begin
  556. location_reset(location,LOC_REGISTER,left.location.size);
  557. location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
  558. if nf_swapped in flags then
  559. begin
  560. location_swap(left.location,right.location);
  561. toggleflag(nf_swapped);
  562. end;
  563. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
  564. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  565. hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
  566. case left.location.loc of
  567. LOC_CREGISTER,LOC_REGISTER:
  568. emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
  569. LOC_CREFERENCE,LOC_REFERENCE:
  570. emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
  571. else
  572. Internalerror(2018040201);
  573. end;
  574. end
  575. else
  576. begin
  577. { left must be a register }
  578. left_must_be_reg(opdef,opsize,noswap);
  579. emit_generic_code(op,opsize,true,extra_not,false);
  580. location_freetemp(current_asmdata.CurrAsmList,right.location);
  581. { left is always a register and contains the result }
  582. location:=left.location;
  583. end;
  584. end;
  585. { fix the changed opsize we did above because of the missing btsb }
  586. if opsize<>int_cgsize(resultdef.size) then
  587. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  588. end;
  589. {$endif not i8086}
  590. procedure tx86addnode.second_cmpsmallset;
  591. var
  592. opdef : tdef;
  593. opsize : TCGSize;
  594. op : TAsmOp;
  595. begin
  596. pass_left_right;
  597. opdef:=left.resultdef;
  598. opsize:=int_cgsize(opdef.size);
  599. case nodetype of
  600. equaln,
  601. unequaln :
  602. op:=A_CMP;
  603. lten,gten:
  604. begin
  605. if (not(nf_swapped in flags) and (nodetype = lten)) or
  606. ((nf_swapped in flags) and (nodetype = gten)) then
  607. swapleftright;
  608. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  609. emit_op_right_left(A_AND,opsize,False);
  610. op:=A_CMP;
  611. { warning: ugly hack, we need a JE so change the node to equaln }
  612. nodetype:=equaln;
  613. end;
  614. else
  615. internalerror(2003042215);
  616. end;
  617. { left must be a register }
  618. left_must_be_reg(opdef,opsize,false);
  619. emit_generic_code(op,opsize,true,false,false);
  620. location_freetemp(current_asmdata.CurrAsmList,right.location);
  621. location_freetemp(current_asmdata.CurrAsmList,left.location);
  622. location_reset(location,LOC_FLAGS,OS_NO);
  623. location.resflags:=getresflags(true);
  624. end;
  625. {*****************************************************************************
  626. AddMMX
  627. *****************************************************************************}
  628. {$ifdef SUPPORT_MMX}
  629. procedure tx86addnode.second_opmmx;
  630. var
  631. op : TAsmOp;
  632. cmpop : boolean;
  633. mmxbase : tmmxtype;
  634. hreg,
  635. hregister : tregister;
  636. begin
  637. pass_left_right;
  638. cmpop:=false;
  639. op:=A_NOP;
  640. mmxbase:=mmx_type(left.resultdef);
  641. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  642. case nodetype of
  643. addn :
  644. begin
  645. if (cs_mmx_saturation in current_settings.localswitches) then
  646. begin
  647. case mmxbase of
  648. mmxs8bit:
  649. op:=A_PADDSB;
  650. mmxu8bit:
  651. op:=A_PADDUSB;
  652. mmxs16bit,mmxfixed16:
  653. op:=A_PADDSW;
  654. mmxu16bit:
  655. op:=A_PADDUSW;
  656. end;
  657. end
  658. else
  659. begin
  660. case mmxbase of
  661. mmxs8bit,mmxu8bit:
  662. op:=A_PADDB;
  663. mmxs16bit,mmxu16bit,mmxfixed16:
  664. op:=A_PADDW;
  665. mmxs32bit,mmxu32bit:
  666. op:=A_PADDD;
  667. end;
  668. end;
  669. end;
  670. muln :
  671. begin
  672. case mmxbase of
  673. mmxs16bit,mmxu16bit:
  674. op:=A_PMULLW;
  675. mmxfixed16:
  676. op:=A_PMULHW;
  677. end;
  678. end;
  679. subn :
  680. begin
  681. if (cs_mmx_saturation in current_settings.localswitches) then
  682. begin
  683. case mmxbase of
  684. mmxs8bit:
  685. op:=A_PSUBSB;
  686. mmxu8bit:
  687. op:=A_PSUBUSB;
  688. mmxs16bit,mmxfixed16:
  689. op:=A_PSUBSB;
  690. mmxu16bit:
  691. op:=A_PSUBUSW;
  692. end;
  693. end
  694. else
  695. begin
  696. case mmxbase of
  697. mmxs8bit,mmxu8bit:
  698. op:=A_PSUBB;
  699. mmxs16bit,mmxu16bit,mmxfixed16:
  700. op:=A_PSUBW;
  701. mmxs32bit,mmxu32bit:
  702. op:=A_PSUBD;
  703. end;
  704. end;
  705. end;
  706. xorn:
  707. op:=A_PXOR;
  708. orn:
  709. op:=A_POR;
  710. andn:
  711. op:=A_PAND;
  712. else
  713. internalerror(2003042214);
  714. end;
  715. if op = A_NOP then
  716. internalerror(201408201);
  717. { left and right no register? }
  718. { then one must be demanded }
  719. if (left.location.loc<>LOC_MMXREGISTER) then
  720. begin
  721. if (right.location.loc=LOC_MMXREGISTER) then
  722. begin
  723. location_swap(left.location,right.location);
  724. toggleflag(nf_swapped);
  725. end
  726. else
  727. begin
  728. { register variable ? }
  729. if (left.location.loc=LOC_CMMXREGISTER) then
  730. begin
  731. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  732. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  733. end
  734. else
  735. begin
  736. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  737. internalerror(200203245);
  738. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  739. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  740. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  741. end;
  742. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  743. left.location.register:=hregister;
  744. end;
  745. end;
  746. { at this point, left.location.loc should be LOC_MMXREGISTER }
  747. if right.location.loc<>LOC_MMXREGISTER then
  748. begin
  749. if (nodetype=subn) and (nf_swapped in flags) then
  750. begin
  751. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  752. if right.location.loc=LOC_CMMXREGISTER then
  753. begin
  754. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  755. emit_reg_reg(op,S_NO,left.location.register,hreg);
  756. end
  757. else
  758. begin
  759. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  760. internalerror(200203247);
  761. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  762. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  763. emit_reg_reg(op,S_NO,left.location.register,hreg);
  764. end;
  765. location.register:=hreg;
  766. end
  767. else
  768. begin
  769. if (right.location.loc=LOC_CMMXREGISTER) then
  770. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  771. else
  772. begin
  773. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  774. internalerror(200203246);
  775. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  776. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  777. end;
  778. location.register:=left.location.register;
  779. end;
  780. end
  781. else
  782. begin
  783. { right.location=LOC_MMXREGISTER }
  784. if (nodetype=subn) and (nf_swapped in flags) then
  785. begin
  786. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  787. location_swap(left.location,right.location);
  788. toggleflag(nf_swapped);
  789. end
  790. else
  791. begin
  792. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  793. end;
  794. location.register:=left.location.register;
  795. end;
  796. location_freetemp(current_asmdata.CurrAsmList,right.location);
  797. if cmpop then
  798. location_freetemp(current_asmdata.CurrAsmList,left.location);
  799. end;
  800. {$endif SUPPORT_MMX}
  801. {*****************************************************************************
  802. AddFloat
  803. *****************************************************************************}
  804. procedure tx86addnode.second_addfloatsse;
  805. var
  806. op : topcg;
  807. sqr_sum : boolean;
  808. tmp : tnode;
  809. begin
  810. sqr_sum:=false;
  811. if (current_settings.fputype>=fpu_sse3) and
  812. use_vectorfpu(resultdef) and
  813. (nodetype in [addn,subn]) and
  814. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  815. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  816. begin
  817. sqr_sum:=true;
  818. tmp:=tinlinenode(left).left;
  819. tinlinenode(left).left:=nil;
  820. left.free;
  821. left:=tmp;
  822. tmp:=tinlinenode(right).left;
  823. tinlinenode(right).left:=nil;
  824. right.free;
  825. right:=tmp;
  826. end;
  827. pass_left_right;
  828. { fpu operands are always in reversed order on the stack }
  829. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  830. toggleflag(nf_swapped);
  831. if (nf_swapped in flags) then
  832. { can't use swapleftright if both are on the fpu stack, since then }
  833. { both are "R_ST" -> nothing would change -> manually switch }
  834. if (left.location.loc = LOC_FPUREGISTER) and
  835. (right.location.loc = LOC_FPUREGISTER) then
  836. emit_none(A_FXCH,S_NO)
  837. else
  838. swapleftright;
  839. case nodetype of
  840. addn :
  841. op:=OP_ADD;
  842. muln :
  843. op:=OP_MUL;
  844. subn :
  845. op:=OP_SUB;
  846. slashn :
  847. op:=OP_DIV;
  848. else
  849. internalerror(200312231);
  850. end;
  851. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  852. if sqr_sum then
  853. begin
  854. if nf_swapped in flags then
  855. swapleftright;
  856. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  857. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  858. location:=left.location;
  859. if is_double(resultdef) then
  860. begin
  861. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  862. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  863. case nodetype of
  864. addn:
  865. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  866. subn:
  867. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  868. else
  869. internalerror(201108162);
  870. end;
  871. end
  872. else
  873. begin
  874. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  875. { ensure that bits 64..127 contain valid values }
  876. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  877. { the data is now in bits 0..32 and 64..95 }
  878. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  879. case nodetype of
  880. addn:
  881. begin
  882. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  883. end;
  884. subn:
  885. begin
  886. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  887. end;
  888. else
  889. internalerror(201108163);
  890. end;
  891. end
  892. end
  893. { we can use only right as left operand if the operation is commutative }
  894. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  895. begin
  896. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  897. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  898. { force floating point reg. location to be written to memory,
  899. we don't force it to mm register because writing to memory
  900. allows probably shorter code because there is no direct fpu->mm register
  901. copy instruction
  902. }
  903. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  904. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  905. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  906. end
  907. else
  908. begin
  909. if nf_swapped in flags then
  910. swapleftright;
  911. { force floating point reg. location to be written to memory,
  912. we don't force it to mm register because writing to memory
  913. allows probably shorter code because there is no direct fpu->mm register
  914. copy instruction
  915. }
  916. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  917. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  918. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  919. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  920. { force floating point reg. location to be written to memory,
  921. we don't force it to mm register because writing to memory
  922. allows probably shorter code because there is no direct fpu->mm register
  923. copy instruction
  924. }
  925. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  926. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  927. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  928. end;
  929. end;
  930. procedure tx86addnode.second_addfloatavx;
  931. var
  932. op : topcg;
  933. sqr_sum : boolean;
  934. {$ifdef dummy}
  935. tmp : tnode;
  936. {$endif dummy}
  937. begin
  938. sqr_sum:=false;
  939. {$ifdef dummy}
  940. if (current_settings.fputype>=fpu_sse3) and
  941. use_vectorfpu(resultdef) and
  942. (nodetype in [addn,subn]) and
  943. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  944. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  945. begin
  946. sqr_sum:=true;
  947. tmp:=tinlinenode(left).left;
  948. tinlinenode(left).left:=nil;
  949. left.free;
  950. left:=tmp;
  951. tmp:=tinlinenode(right).left;
  952. tinlinenode(right).left:=nil;
  953. right.free;
  954. right:=tmp;
  955. end;
  956. {$endif dummy}
  957. pass_left_right;
  958. { fpu operands are always in reversed order on the stack }
  959. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  960. toggleflag(nf_swapped);
  961. if (nf_swapped in flags) then
  962. { can't use swapleftright if both are on the fpu stack, since then }
  963. { both are "R_ST" -> nothing would change -> manually switch }
  964. if (left.location.loc = LOC_FPUREGISTER) and
  965. (right.location.loc = LOC_FPUREGISTER) then
  966. emit_none(A_FXCH,S_NO)
  967. else
  968. swapleftright;
  969. case nodetype of
  970. addn :
  971. op:=OP_ADD;
  972. muln :
  973. op:=OP_MUL;
  974. subn :
  975. op:=OP_SUB;
  976. slashn :
  977. op:=OP_DIV;
  978. else
  979. internalerror(200312231);
  980. end;
  981. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  982. if sqr_sum then
  983. begin
  984. if nf_swapped in flags then
  985. swapleftright;
  986. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  987. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  988. location:=left.location;
  989. if is_double(resultdef) then
  990. begin
  991. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  992. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  993. case nodetype of
  994. addn:
  995. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  996. subn:
  997. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  998. else
  999. internalerror(201108162);
  1000. end;
  1001. end
  1002. else
  1003. begin
  1004. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  1005. { ensure that bits 64..127 contain valid values }
  1006. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  1007. { the data is now in bits 0..32 and 64..95 }
  1008. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  1009. case nodetype of
  1010. addn:
  1011. begin
  1012. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  1013. end;
  1014. subn:
  1015. begin
  1016. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  1017. end;
  1018. else
  1019. internalerror(201108163);
  1020. end;
  1021. end
  1022. end
  1023. { left*2 ? }
  1024. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  1025. begin
  1026. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1027. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1028. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1029. left.location.register,
  1030. left.location.register,
  1031. location.register,
  1032. mms_movescalar);
  1033. end
  1034. { right*2 ? }
  1035. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  1036. begin
  1037. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  1038. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1039. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1040. right.location.register,
  1041. right.location.register,
  1042. location.register,
  1043. mms_movescalar);
  1044. end
  1045. { we can use only right as left operand if the operation is commutative }
  1046. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1047. begin
  1048. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1049. { force floating point reg. location to be written to memory,
  1050. we don't force it to mm register because writing to memory
  1051. allows probably shorter code because there is no direct fpu->mm register
  1052. copy instruction
  1053. }
  1054. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1055. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1056. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1057. left.location,
  1058. right.location.register,
  1059. location.register,
  1060. mms_movescalar);
  1061. end
  1062. else
  1063. begin
  1064. if (nf_swapped in flags) then
  1065. swapleftright;
  1066. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1067. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1068. { force floating point reg. location to be written to memory,
  1069. we don't force it to mm register because writing to memory
  1070. allows probably shorter code because there is no direct fpu->mm register
  1071. copy instruction
  1072. }
  1073. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1074. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1075. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1076. right.location,
  1077. left.location.register,
  1078. location.register,
  1079. mms_movescalar);
  1080. end;
  1081. end;
  1082. function tx86addnode.use_fma : boolean;
  1083. begin
  1084. {$ifndef i8086}
  1085. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1086. Result:=use_vectorfpu(resultdef) and
  1087. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1088. {$else i8086}
  1089. Result:=inherited use_fma;
  1090. {$endif i8086}
  1091. end;
  1092. procedure tx86addnode.second_cmpfloatvector;
  1093. var
  1094. op : tasmop;
  1095. const
  1096. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1097. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1098. begin
  1099. if is_single(left.resultdef) then
  1100. op:=ops_single[UseAVX]
  1101. else if is_double(left.resultdef) then
  1102. op:=ops_double[UseAVX]
  1103. else
  1104. internalerror(200402222);
  1105. pass_left_right;
  1106. location_reset(location,LOC_FLAGS,OS_NO);
  1107. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1108. memory (not to mm registers because one of the memory locations can be used
  1109. directly in compare instruction, yielding shorter code) }
  1110. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1111. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1112. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1113. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1114. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1115. begin
  1116. case left.location.loc of
  1117. LOC_REFERENCE,LOC_CREFERENCE:
  1118. begin
  1119. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1120. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1121. end;
  1122. LOC_MMREGISTER,LOC_CMMREGISTER:
  1123. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1124. else
  1125. internalerror(200402221);
  1126. end;
  1127. toggleflag(nf_swapped);
  1128. end
  1129. else
  1130. begin
  1131. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1132. case right.location.loc of
  1133. LOC_REFERENCE,LOC_CREFERENCE:
  1134. begin
  1135. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1136. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1137. end;
  1138. LOC_MMREGISTER,LOC_CMMREGISTER:
  1139. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1140. else
  1141. internalerror(200402223);
  1142. end;
  1143. end;
  1144. location.resflags:=getfpuresflags;
  1145. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1146. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1147. end;
  1148. procedure tx86addnode.second_opvector;
  1149. var
  1150. op : topcg;
  1151. begin
  1152. pass_left_right;
  1153. if (nf_swapped in flags) then
  1154. swapleftright;
  1155. case nodetype of
  1156. addn :
  1157. op:=OP_ADD;
  1158. muln :
  1159. op:=OP_MUL;
  1160. subn :
  1161. op:=OP_SUB;
  1162. slashn :
  1163. op:=OP_DIV;
  1164. else
  1165. internalerror(200610071);
  1166. end;
  1167. if fits_in_mm_register(left.resultdef) then
  1168. begin
  1169. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1170. { we can use only right as left operand if the operation is commutative }
  1171. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1172. begin
  1173. location.register:=right.location.register;
  1174. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1175. end
  1176. else
  1177. begin
  1178. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1179. location.register:=left.location.register;
  1180. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1181. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1182. end;
  1183. end
  1184. else
  1185. begin
  1186. { not yet supported }
  1187. internalerror(200610072);
  1188. end
  1189. end;
  1190. procedure tx86addnode.second_addfloat;
  1191. const
  1192. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1193. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1194. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1195. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1196. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1197. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1198. var
  1199. op : TAsmOp;
  1200. refnode : tnode;
  1201. hasref : boolean;
  1202. begin
  1203. if use_vectorfpu(resultdef) then
  1204. begin
  1205. if UseAVX then
  1206. second_addfloatavx
  1207. else
  1208. second_addfloatsse;
  1209. exit;
  1210. end;
  1211. pass_left_right;
  1212. prepare_x87_locations(refnode);
  1213. hasref:=assigned(refnode);
  1214. case nodetype of
  1215. addn :
  1216. op:=ops_add[hasref];
  1217. muln :
  1218. op:=ops_mul[hasref];
  1219. subn :
  1220. if (nf_swapped in flags) then
  1221. op:=ops_rsub[hasref]
  1222. else
  1223. op:=ops_sub[hasref];
  1224. slashn :
  1225. if (nf_swapped in flags) then
  1226. op:=ops_rdiv[hasref]
  1227. else
  1228. op:=ops_div[hasref];
  1229. else
  1230. internalerror(2003042214);
  1231. end;
  1232. if hasref then
  1233. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1234. else
  1235. begin
  1236. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1237. tcgx86(cg).dec_fpu_stack;
  1238. end;
  1239. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1240. location.register:=NR_ST;
  1241. end;
  1242. procedure tx86addnode.second_cmpfloat;
  1243. {$ifdef i8086}
  1244. var
  1245. tmpref: treference;
  1246. {$endif i8086}
  1247. begin
  1248. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1249. begin
  1250. second_cmpfloatvector;
  1251. exit;
  1252. end;
  1253. pass_left_right;
  1254. force_left_and_right_fpureg;
  1255. {$ifndef x86_64}
  1256. if current_settings.cputype<cpu_Pentium2 then
  1257. begin
  1258. emit_none(A_FCOMPP,S_NO);
  1259. tcgx86(cg).dec_fpu_stack;
  1260. tcgx86(cg).dec_fpu_stack;
  1261. { load fpu flags }
  1262. {$ifdef i8086}
  1263. if current_settings.cputype < cpu_286 then
  1264. begin
  1265. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1266. emit_ref(A_FSTSW,S_NO,tmpref);
  1267. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1268. inc(tmpref.offset);
  1269. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1270. dec(tmpref.offset);
  1271. emit_none(A_SAHF,S_NO);
  1272. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1273. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1274. end
  1275. else
  1276. {$endif i8086}
  1277. begin
  1278. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1279. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1280. emit_none(A_SAHF,S_NO);
  1281. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1282. end;
  1283. end
  1284. else
  1285. {$endif x86_64}
  1286. begin
  1287. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1288. { fcomip pops only one fpu register }
  1289. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1290. tcgx86(cg).dec_fpu_stack;
  1291. tcgx86(cg).dec_fpu_stack;
  1292. end;
  1293. location_reset(location,LOC_FLAGS,OS_NO);
  1294. location.resflags:=getfpuresflags;
  1295. end;
  1296. {*****************************************************************************
  1297. Add64bit
  1298. *****************************************************************************}
  1299. procedure tx86addnode.second_add64bit;
  1300. begin
  1301. {$ifdef cpu64bitalu}
  1302. second_addordinal;
  1303. {$else cpu64bitalu}
  1304. { must be implemented separate }
  1305. internalerror(200402042);
  1306. {$endif cpu64bitalu}
  1307. end;
  1308. procedure tx86addnode.second_cmp64bit;
  1309. begin
  1310. {$ifdef cpu64bitalu}
  1311. second_cmpordinal;
  1312. {$else cpu64bitalu}
  1313. { must be implemented separate }
  1314. internalerror(200402043);
  1315. {$endif cpu64bitalu}
  1316. end;
  1317. {*****************************************************************************
  1318. AddOrdinal
  1319. *****************************************************************************}
  1320. procedure tx86addnode.second_addordinal;
  1321. var
  1322. opsize : tcgsize;
  1323. unsigned : boolean;
  1324. cgop : topcg;
  1325. checkoverflow : Boolean;
  1326. ovloc : tlocation;
  1327. tmpreg : TRegister;
  1328. begin
  1329. { determine if the comparison will be unsigned }
  1330. unsigned:=not(is_signed(left.resultdef)) or
  1331. not(is_signed(right.resultdef));
  1332. { assume no overflow checking is require }
  1333. checkoverflow := false;
  1334. ovloc.loc:=LOC_VOID;
  1335. case nodetype of
  1336. addn:
  1337. begin
  1338. cgop:=OP_ADD;
  1339. checkoverflow:=true;
  1340. end;
  1341. xorn :
  1342. begin
  1343. cgop:=OP_XOR;
  1344. end;
  1345. orn :
  1346. begin
  1347. cgop:=OP_OR;
  1348. end;
  1349. andn:
  1350. begin
  1351. cgop:=OP_AND;
  1352. end;
  1353. muln:
  1354. begin
  1355. checkoverflow:=true;
  1356. if unsigned then
  1357. cgop:=OP_MUL
  1358. else
  1359. cgop:=OP_IMUL;
  1360. end;
  1361. subn :
  1362. begin
  1363. checkoverflow:=true;
  1364. cgop:=OP_SUB;
  1365. end;
  1366. else
  1367. internalerror(2015022501);
  1368. end;
  1369. checkoverflow:=
  1370. checkoverflow and
  1371. (left.resultdef.typ<>pointerdef) and
  1372. (right.resultdef.typ<>pointerdef) and
  1373. (cs_check_overflow in current_settings.localswitches);
  1374. opsize:=def_cgsize(left.resultdef);
  1375. pass_left_right;
  1376. { do have to allocate a register? If yes, then three opcode instructions are better }
  1377. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER)) or
  1378. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1379. begin
  1380. { allocate registers }
  1381. force_reg_left_right(false,true);
  1382. set_result_location_reg;
  1383. if nodetype<>subn then
  1384. begin
  1385. if (right.location.loc<>LOC_CONSTANT) then
  1386. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1387. left.location.register,right.location.register,
  1388. location.register,checkoverflow,ovloc)
  1389. else
  1390. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1391. right.location.value,left.location.register,
  1392. location.register,checkoverflow,ovloc);
  1393. end
  1394. else { subtract is a special case since its not commutative }
  1395. begin
  1396. if (nf_swapped in flags) then
  1397. swapleftright;
  1398. if left.location.loc<>LOC_CONSTANT then
  1399. begin
  1400. if right.location.loc<>LOC_CONSTANT then
  1401. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1402. right.location.register,left.location.register,
  1403. location.register,checkoverflow,ovloc)
  1404. else
  1405. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1406. right.location.value,left.location.register,
  1407. location.register,checkoverflow,ovloc);
  1408. end
  1409. else
  1410. begin
  1411. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1412. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1413. left.location.value,tmpreg);
  1414. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1415. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1416. end;
  1417. end
  1418. end
  1419. else
  1420. begin
  1421. { at least one location is a register, re-use it, so we can try two operand opcodes }
  1422. if left.location.loc<>LOC_REGISTER then
  1423. begin
  1424. if right.location.loc<>LOC_REGISTER then
  1425. begin
  1426. { tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1427. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,left.location,tmpreg);
  1428. location_reset(left.location,LOC_REGISTER,opsize);
  1429. left.location.register:=tmpreg;
  1430. }
  1431. Internalerror(2018031102);
  1432. end
  1433. else
  1434. begin
  1435. location_swap(left.location,right.location);
  1436. toggleflag(nf_swapped);
  1437. end;
  1438. end;
  1439. { at this point, left.location.loc should be LOC_REGISTER }
  1440. if right.location.loc=LOC_REGISTER then
  1441. begin
  1442. { when swapped another result register }
  1443. if (nodetype=subn) and (nf_swapped in flags) then
  1444. begin
  1445. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1446. left.location.register,right.location.register);
  1447. location_swap(left.location,right.location);
  1448. toggleflag(nf_swapped);
  1449. end
  1450. else
  1451. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1452. right.location.register,left.location.register);
  1453. end
  1454. else
  1455. begin
  1456. { right.location<>LOC_REGISTER }
  1457. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  1458. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  1459. if (nodetype=subn) and (nf_swapped in flags) then
  1460. begin
  1461. tmpreg:=left.location.register;
  1462. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1463. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  1464. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  1465. end
  1466. else
  1467. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  1468. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1469. end;
  1470. location_copy(location,left.location);
  1471. end;
  1472. { emit overflow check if required }
  1473. if checkoverflow then
  1474. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  1475. end;
  1476. procedure tx86addnode.second_cmpordinal;
  1477. var
  1478. opdef : tdef;
  1479. opsize : tcgsize;
  1480. unsigned : boolean;
  1481. begin
  1482. unsigned:=not(is_signed(left.resultdef)) or
  1483. not(is_signed(right.resultdef));
  1484. opdef:=left.resultdef;
  1485. opsize:=def_cgsize(opdef);
  1486. pass_left_right;
  1487. if (right.location.loc=LOC_CONSTANT) and
  1488. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1489. {$ifdef x86_64}
  1490. and ((not (opsize in [OS_64,OS_S64])) or (
  1491. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1492. ))
  1493. {$endif x86_64}
  1494. then
  1495. begin
  1496. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1497. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1498. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1499. end
  1500. else
  1501. begin
  1502. left_must_be_reg(opdef,opsize,false);
  1503. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1504. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1505. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1506. end;
  1507. location_reset(location,LOC_FLAGS,OS_NO);
  1508. location.resflags:=getresflags(unsigned);
  1509. end;
  1510. begin
  1511. caddnode:=tx86addnode;
  1512. end.