nx86add.pas 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. {$ifdef SUPPORT_MMX}
  50. procedure second_opmmx;override;
  51. {$endif SUPPORT_MMX}
  52. procedure second_opvector;override;
  53. end;
  54. implementation
  55. uses
  56. globtype,globals,systems,
  57. verbose,cutils,
  58. cpuinfo,
  59. aasmbase,aasmtai,aasmdata,aasmcpu,
  60. symconst,symdef,
  61. cgobj,hlcgobj,cgx86,cga,cgutils,
  62. paramgr,tgobj,ncgutil,
  63. ncon,nset,ninl,
  64. defutil;
  65. {*****************************************************************************
  66. Helpers
  67. *****************************************************************************}
  68. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  69. var
  70. power : longint;
  71. hl4 : tasmlabel;
  72. r : Tregister;
  73. href : treference;
  74. begin
  75. { at this point, left.location.loc should be LOC_REGISTER }
  76. if right.location.loc=LOC_REGISTER then
  77. begin
  78. { right.location is a LOC_REGISTER }
  79. { when swapped another result register }
  80. if (nodetype=subn) and (nf_swapped in flags) then
  81. begin
  82. if extra_not then
  83. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  84. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  85. { newly swapped also set swapped flag }
  86. location_swap(left.location,right.location);
  87. toggleflag(nf_swapped);
  88. end
  89. else
  90. begin
  91. if extra_not then
  92. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  93. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  94. location_swap(left.location,right.location);
  95. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  96. end;
  97. end
  98. else
  99. begin
  100. { right.location is not a LOC_REGISTER }
  101. if (nodetype=subn) and (nf_swapped in flags) then
  102. begin
  103. if extra_not then
  104. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  105. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  106. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  107. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  108. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  109. end
  110. else
  111. begin
  112. { Optimizations when right.location is a constant value }
  113. if (op=A_CMP) and
  114. (nodetype in [equaln,unequaln]) and
  115. (right.location.loc=LOC_CONSTANT) and
  116. (right.location.value=0) then
  117. begin
  118. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  119. spilling, while 'test %reg,%reg' still requires loading into register.
  120. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  121. peephole optimizer (this optimization is currently available only for i386). }
  122. {$ifdef i386}
  123. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  124. {$else i386}
  125. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  126. {$endif i386}
  127. end
  128. else
  129. if (op=A_ADD) and
  130. (right.location.loc=LOC_CONSTANT) and
  131. (right.location.value=1) and
  132. not(cs_check_overflow in current_settings.localswitches) and
  133. UseIncDec then
  134. begin
  135. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  136. end
  137. else
  138. if (op=A_SUB) and
  139. (right.location.loc=LOC_CONSTANT) and
  140. (right.location.value=1) and
  141. not(cs_check_overflow in current_settings.localswitches) and
  142. UseIncDec then
  143. begin
  144. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  145. end
  146. else
  147. if (op=A_IMUL) and
  148. (right.location.loc=LOC_CONSTANT) and
  149. (ispowerof2(int64(right.location.value),power)) and
  150. not(cs_check_overflow in current_settings.localswitches) then
  151. begin
  152. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  153. end
  154. else if (op=A_IMUL) and
  155. (right.location.loc=LOC_CONSTANT) and
  156. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  157. (power in [1..3]) and
  158. not(cs_check_overflow in current_settings.localswitches) then
  159. begin
  160. reference_reset_base(href,left.location.register,0,0);
  161. href.index:=left.location.register;
  162. href.scalefactor:=int64(right.location.value)-1;
  163. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  164. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  165. end
  166. else
  167. begin
  168. if extra_not then
  169. begin
  170. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  171. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  172. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  173. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  174. end
  175. else
  176. begin
  177. emit_op_right_left(op,opsize);
  178. end;
  179. end;
  180. end;
  181. end;
  182. { only in case of overflow operations }
  183. { produce overflow code }
  184. { we must put it here directly, because sign of operation }
  185. { is in unsigned VAR!! }
  186. if mboverflow then
  187. begin
  188. if cs_check_overflow in current_settings.localswitches then
  189. begin
  190. current_asmdata.getjumplabel(hl4);
  191. if unsigned then
  192. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  193. else
  194. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  195. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  196. cg.a_label(current_asmdata.CurrAsmList,hl4);
  197. end;
  198. end;
  199. end;
  200. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  201. begin
  202. { left location is not a register? }
  203. if (left.location.loc<>LOC_REGISTER) then
  204. begin
  205. { if right is register then we can swap the locations }
  206. if (not noswap) and
  207. (right.location.loc=LOC_REGISTER) then
  208. begin
  209. location_swap(left.location,right.location);
  210. toggleflag(nf_swapped);
  211. end
  212. else
  213. begin
  214. { maybe we can reuse a constant register when the
  215. operation is a comparison that doesn't change the
  216. value of the register }
  217. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  218. end;
  219. end;
  220. if (right.location.loc<>LOC_CONSTANT) and
  221. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  222. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  223. if (left.location.loc<>LOC_CONSTANT) and
  224. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  225. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  226. end;
  227. procedure tx86addnode.force_left_and_right_fpureg;
  228. begin
  229. if (right.location.loc<>LOC_FPUREGISTER) then
  230. begin
  231. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  232. if (left.location.loc<>LOC_FPUREGISTER) then
  233. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  234. else
  235. { left was on the stack => swap }
  236. toggleflag(nf_swapped);
  237. end
  238. { the nominator in st0 }
  239. else if (left.location.loc<>LOC_FPUREGISTER) then
  240. begin
  241. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  242. end
  243. else
  244. begin
  245. { fpu operands are always in the wrong order on the stack }
  246. toggleflag(nf_swapped);
  247. end;
  248. end;
  249. { Makes sides suitable for executing an x87 instruction:
  250. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  251. everything else is loaded to FPU stack. }
  252. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  253. begin
  254. refnode:=nil;
  255. { later on, no mm registers are allowed, so transfer everything to memory here
  256. below it is loaded into an fpu register if neede }
  257. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  258. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  259. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  260. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  261. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  262. 0:
  263. begin
  264. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  265. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  266. InternalError(2013090803);
  267. if (left.location.size in [OS_F32,OS_F64]) then
  268. begin
  269. refnode:=left;
  270. toggleflag(nf_swapped);
  271. end
  272. else
  273. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  274. end;
  275. 1:
  276. begin { if left is on the stack then swap. }
  277. if (left.location.loc=LOC_FPUREGISTER) then
  278. refnode:=right
  279. else
  280. refnode:=left;
  281. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  282. InternalError(2013090801);
  283. if not (refnode.location.size in [OS_F32,OS_F64]) then
  284. begin
  285. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  286. if (refnode=right) then
  287. toggleflag(nf_swapped);
  288. refnode:=nil;
  289. end
  290. else
  291. begin
  292. if (refnode=left) then
  293. toggleflag(nf_swapped);
  294. end;
  295. end;
  296. 2: { fpu operands are always in the wrong order on the stack }
  297. toggleflag(nf_swapped);
  298. else
  299. InternalError(2013090802);
  300. end;
  301. end;
  302. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  303. {$ifdef x86_64}
  304. var
  305. tmpreg : tregister;
  306. {$endif x86_64}
  307. begin
  308. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  309. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  310. { left must be a register }
  311. case right.location.loc of
  312. LOC_REGISTER,
  313. LOC_CREGISTER :
  314. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  315. LOC_REFERENCE,
  316. LOC_CREFERENCE :
  317. begin
  318. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  319. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  320. end;
  321. LOC_CONSTANT :
  322. begin
  323. {$ifdef x86_64}
  324. { x86_64 only supports signed 32 bits constants directly }
  325. if (opsize in [OS_S64,OS_64]) and
  326. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  327. begin
  328. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  329. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  330. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  331. end
  332. else
  333. {$endif x86_64}
  334. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  335. end;
  336. else
  337. internalerror(200203232);
  338. end;
  339. end;
  340. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  341. begin
  342. case nodetype of
  343. equaln : getresflags:=F_E;
  344. unequaln : getresflags:=F_NE;
  345. else
  346. if not(unsigned) then
  347. begin
  348. if nf_swapped in flags then
  349. case nodetype of
  350. ltn : getresflags:=F_G;
  351. lten : getresflags:=F_GE;
  352. gtn : getresflags:=F_L;
  353. gten : getresflags:=F_LE;
  354. else
  355. internalerror(2013120105);
  356. end
  357. else
  358. case nodetype of
  359. ltn : getresflags:=F_L;
  360. lten : getresflags:=F_LE;
  361. gtn : getresflags:=F_G;
  362. gten : getresflags:=F_GE;
  363. else
  364. internalerror(2013120106);
  365. end;
  366. end
  367. else
  368. begin
  369. if nf_swapped in flags then
  370. case nodetype of
  371. ltn : getresflags:=F_A;
  372. lten : getresflags:=F_AE;
  373. gtn : getresflags:=F_B;
  374. gten : getresflags:=F_BE;
  375. else
  376. internalerror(2013120107);
  377. end
  378. else
  379. case nodetype of
  380. ltn : getresflags:=F_B;
  381. lten : getresflags:=F_BE;
  382. gtn : getresflags:=F_A;
  383. gten : getresflags:=F_AE;
  384. else
  385. internalerror(2013120108);
  386. end;
  387. end;
  388. end;
  389. end;
  390. function tx86addnode.getfpuresflags : tresflags;
  391. begin
  392. if (nodetype=equaln) then
  393. result:=F_FE
  394. else if (nodetype=unequaln) then
  395. result:=F_FNE
  396. else if (nf_swapped in flags) then
  397. case nodetype of
  398. ltn : result:=F_FA;
  399. lten : result:=F_FAE;
  400. gtn : result:=F_FB;
  401. gten : result:=F_FBE;
  402. else
  403. internalerror(2014031402);
  404. end
  405. else
  406. case nodetype of
  407. ltn : result:=F_FB;
  408. lten : result:=F_FBE;
  409. gtn : result:=F_FA;
  410. gten : result:=F_FAE;
  411. else
  412. internalerror(2014031403);
  413. end;
  414. end;
  415. {*****************************************************************************
  416. AddSmallSet
  417. *****************************************************************************}
  418. {$ifndef i8086}
  419. procedure tx86addnode.second_addsmallset;
  420. var
  421. setbase : aint;
  422. opdef : tdef;
  423. opsize : TCGSize;
  424. op : TAsmOp;
  425. extra_not,
  426. noswap : boolean;
  427. all_member_optimization:boolean;
  428. begin
  429. pass_left_right;
  430. noswap:=false;
  431. extra_not:=false;
  432. all_member_optimization:=false;
  433. opdef:=resultdef;
  434. opsize:=int_cgsize(opdef.size);
  435. if (left.resultdef.typ=setdef) then
  436. setbase:=tsetdef(left.resultdef).setbase
  437. else
  438. setbase:=tsetdef(right.resultdef).setbase;
  439. case nodetype of
  440. addn :
  441. begin
  442. { adding elements is not commutative }
  443. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  444. swapleftright;
  445. { are we adding set elements ? }
  446. if right.nodetype=setelementn then
  447. begin
  448. { no range support for smallsets! }
  449. if assigned(tsetelementnode(right).right) then
  450. internalerror(43244);
  451. { btsb isn't supported }
  452. if opsize=OS_8 then
  453. begin
  454. opsize:=OS_32;
  455. opdef:=u32inttype;
  456. end;
  457. { bts requires both elements to be registers }
  458. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  459. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  460. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  461. op:=A_BTS;
  462. noswap:=true;
  463. end
  464. else
  465. op:=A_OR;
  466. end;
  467. symdifn :
  468. op:=A_XOR;
  469. muln :
  470. op:=A_AND;
  471. subn :
  472. begin
  473. op:=A_AND;
  474. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  475. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  476. all_member_optimization:=true;
  477. if (not(nf_swapped in flags)) and
  478. (right.location.loc=LOC_CONSTANT) then
  479. right.location.value := not(right.location.value)
  480. else if (nf_swapped in flags) and
  481. (left.location.loc=LOC_CONSTANT) then
  482. left.location.value := not(left.location.value)
  483. else
  484. extra_not:=true;
  485. end;
  486. xorn :
  487. op:=A_XOR;
  488. orn :
  489. op:=A_OR;
  490. andn :
  491. op:=A_AND;
  492. else
  493. internalerror(2003042215);
  494. end;
  495. if all_member_optimization then
  496. begin
  497. {A set expression [0..31]-x can be implemented with a simple NOT.}
  498. if nf_swapped in flags then
  499. begin
  500. { newly swapped also set swapped flag }
  501. location_swap(left.location,right.location);
  502. toggleflag(nf_swapped);
  503. end;
  504. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  505. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  506. location:=right.location;
  507. end
  508. else
  509. begin
  510. { left must be a register }
  511. left_must_be_reg(opdef,opsize,noswap);
  512. emit_generic_code(op,opsize,true,extra_not,false);
  513. location_freetemp(current_asmdata.CurrAsmList,right.location);
  514. { left is always a register and contains the result }
  515. location:=left.location;
  516. end;
  517. { fix the changed opsize we did above because of the missing btsb }
  518. if opsize<>int_cgsize(resultdef.size) then
  519. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  520. end;
  521. {$endif not i8086}
  522. procedure tx86addnode.second_cmpsmallset;
  523. var
  524. opdef : tdef;
  525. opsize : TCGSize;
  526. op : TAsmOp;
  527. begin
  528. pass_left_right;
  529. opdef:=left.resultdef;
  530. opsize:=int_cgsize(opdef.size);
  531. case nodetype of
  532. equaln,
  533. unequaln :
  534. op:=A_CMP;
  535. lten,gten:
  536. begin
  537. if (not(nf_swapped in flags) and (nodetype = lten)) or
  538. ((nf_swapped in flags) and (nodetype = gten)) then
  539. swapleftright;
  540. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  541. emit_op_right_left(A_AND,opsize);
  542. op:=A_CMP;
  543. { warning: ugly hack, we need a JE so change the node to equaln }
  544. nodetype:=equaln;
  545. end;
  546. else
  547. internalerror(2003042215);
  548. end;
  549. { left must be a register }
  550. left_must_be_reg(opdef,opsize,false);
  551. emit_generic_code(op,opsize,true,false,false);
  552. location_freetemp(current_asmdata.CurrAsmList,right.location);
  553. location_freetemp(current_asmdata.CurrAsmList,left.location);
  554. location_reset(location,LOC_FLAGS,OS_NO);
  555. location.resflags:=getresflags(true);
  556. end;
  557. {*****************************************************************************
  558. AddMMX
  559. *****************************************************************************}
  560. {$ifdef SUPPORT_MMX}
  561. procedure tx86addnode.second_opmmx;
  562. var
  563. op : TAsmOp;
  564. cmpop : boolean;
  565. mmxbase : tmmxtype;
  566. hreg,
  567. hregister : tregister;
  568. begin
  569. pass_left_right;
  570. cmpop:=false;
  571. op:=A_NOP;
  572. mmxbase:=mmx_type(left.resultdef);
  573. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  574. case nodetype of
  575. addn :
  576. begin
  577. if (cs_mmx_saturation in current_settings.localswitches) then
  578. begin
  579. case mmxbase of
  580. mmxs8bit:
  581. op:=A_PADDSB;
  582. mmxu8bit:
  583. op:=A_PADDUSB;
  584. mmxs16bit,mmxfixed16:
  585. op:=A_PADDSW;
  586. mmxu16bit:
  587. op:=A_PADDUSW;
  588. end;
  589. end
  590. else
  591. begin
  592. case mmxbase of
  593. mmxs8bit,mmxu8bit:
  594. op:=A_PADDB;
  595. mmxs16bit,mmxu16bit,mmxfixed16:
  596. op:=A_PADDW;
  597. mmxs32bit,mmxu32bit:
  598. op:=A_PADDD;
  599. end;
  600. end;
  601. end;
  602. muln :
  603. begin
  604. case mmxbase of
  605. mmxs16bit,mmxu16bit:
  606. op:=A_PMULLW;
  607. mmxfixed16:
  608. op:=A_PMULHW;
  609. end;
  610. end;
  611. subn :
  612. begin
  613. if (cs_mmx_saturation in current_settings.localswitches) then
  614. begin
  615. case mmxbase of
  616. mmxs8bit:
  617. op:=A_PSUBSB;
  618. mmxu8bit:
  619. op:=A_PSUBUSB;
  620. mmxs16bit,mmxfixed16:
  621. op:=A_PSUBSB;
  622. mmxu16bit:
  623. op:=A_PSUBUSW;
  624. end;
  625. end
  626. else
  627. begin
  628. case mmxbase of
  629. mmxs8bit,mmxu8bit:
  630. op:=A_PSUBB;
  631. mmxs16bit,mmxu16bit,mmxfixed16:
  632. op:=A_PSUBW;
  633. mmxs32bit,mmxu32bit:
  634. op:=A_PSUBD;
  635. end;
  636. end;
  637. end;
  638. xorn:
  639. op:=A_PXOR;
  640. orn:
  641. op:=A_POR;
  642. andn:
  643. op:=A_PAND;
  644. else
  645. internalerror(2003042214);
  646. end;
  647. if op = A_NOP then
  648. internalerror(201408201);
  649. { left and right no register? }
  650. { then one must be demanded }
  651. if (left.location.loc<>LOC_MMXREGISTER) then
  652. begin
  653. if (right.location.loc=LOC_MMXREGISTER) then
  654. begin
  655. location_swap(left.location,right.location);
  656. toggleflag(nf_swapped);
  657. end
  658. else
  659. begin
  660. { register variable ? }
  661. if (left.location.loc=LOC_CMMXREGISTER) then
  662. begin
  663. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  664. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  665. end
  666. else
  667. begin
  668. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  669. internalerror(200203245);
  670. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  671. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  672. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  673. end;
  674. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  675. left.location.register:=hregister;
  676. end;
  677. end;
  678. { at this point, left.location.loc should be LOC_MMXREGISTER }
  679. if right.location.loc<>LOC_MMXREGISTER then
  680. begin
  681. if (nodetype=subn) and (nf_swapped in flags) then
  682. begin
  683. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  684. if right.location.loc=LOC_CMMXREGISTER then
  685. begin
  686. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  687. emit_reg_reg(op,S_NO,left.location.register,hreg);
  688. end
  689. else
  690. begin
  691. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  692. internalerror(200203247);
  693. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  694. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  695. emit_reg_reg(op,S_NO,left.location.register,hreg);
  696. end;
  697. location.register:=hreg;
  698. end
  699. else
  700. begin
  701. if (right.location.loc=LOC_CMMXREGISTER) then
  702. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  703. else
  704. begin
  705. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  706. internalerror(200203246);
  707. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  708. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  709. end;
  710. location.register:=left.location.register;
  711. end;
  712. end
  713. else
  714. begin
  715. { right.location=LOC_MMXREGISTER }
  716. if (nodetype=subn) and (nf_swapped in flags) then
  717. begin
  718. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  719. location_swap(left.location,right.location);
  720. toggleflag(nf_swapped);
  721. end
  722. else
  723. begin
  724. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  725. end;
  726. location.register:=left.location.register;
  727. end;
  728. location_freetemp(current_asmdata.CurrAsmList,right.location);
  729. if cmpop then
  730. location_freetemp(current_asmdata.CurrAsmList,left.location);
  731. end;
  732. {$endif SUPPORT_MMX}
  733. {*****************************************************************************
  734. AddFloat
  735. *****************************************************************************}
  736. procedure tx86addnode.second_addfloatsse;
  737. var
  738. op : topcg;
  739. sqr_sum : boolean;
  740. tmp : tnode;
  741. begin
  742. sqr_sum:=false;
  743. if (current_settings.fputype>=fpu_sse3) and
  744. use_vectorfpu(resultdef) and
  745. (nodetype in [addn,subn]) and
  746. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  747. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  748. begin
  749. sqr_sum:=true;
  750. tmp:=tinlinenode(left).left;
  751. tinlinenode(left).left:=nil;
  752. left.free;
  753. left:=tmp;
  754. tmp:=tinlinenode(right).left;
  755. tinlinenode(right).left:=nil;
  756. right.free;
  757. right:=tmp;
  758. end;
  759. pass_left_right;
  760. { fpu operands are always in reversed order on the stack }
  761. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  762. toggleflag(nf_swapped);
  763. if (nf_swapped in flags) then
  764. { can't use swapleftright if both are on the fpu stack, since then }
  765. { both are "R_ST" -> nothing would change -> manually switch }
  766. if (left.location.loc = LOC_FPUREGISTER) and
  767. (right.location.loc = LOC_FPUREGISTER) then
  768. emit_none(A_FXCH,S_NO)
  769. else
  770. swapleftright;
  771. case nodetype of
  772. addn :
  773. op:=OP_ADD;
  774. muln :
  775. op:=OP_MUL;
  776. subn :
  777. op:=OP_SUB;
  778. slashn :
  779. op:=OP_DIV;
  780. else
  781. internalerror(200312231);
  782. end;
  783. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  784. if sqr_sum then
  785. begin
  786. if nf_swapped in flags then
  787. swapleftright;
  788. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  789. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  790. location:=left.location;
  791. if is_double(resultdef) then
  792. begin
  793. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  794. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  795. case nodetype of
  796. addn:
  797. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  798. subn:
  799. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  800. else
  801. internalerror(201108162);
  802. end;
  803. end
  804. else
  805. begin
  806. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  807. { ensure that bits 64..127 contain valid values }
  808. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  809. { the data is now in bits 0..32 and 64..95 }
  810. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  811. case nodetype of
  812. addn:
  813. begin
  814. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  815. end;
  816. subn:
  817. begin
  818. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  819. end;
  820. else
  821. internalerror(201108163);
  822. end;
  823. end
  824. end
  825. { we can use only right as left operand if the operation is commutative }
  826. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  827. begin
  828. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  829. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  830. { force floating point reg. location to be written to memory,
  831. we don't force it to mm register because writing to memory
  832. allows probably shorter code because there is no direct fpu->mm register
  833. copy instruction
  834. }
  835. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  836. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  837. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  838. end
  839. else
  840. begin
  841. if nf_swapped in flags then
  842. swapleftright;
  843. { force floating point reg. location to be written to memory,
  844. we don't force it to mm register because writing to memory
  845. allows probably shorter code because there is no direct fpu->mm register
  846. copy instruction
  847. }
  848. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  849. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  850. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  851. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  852. { force floating point reg. location to be written to memory,
  853. we don't force it to mm register because writing to memory
  854. allows probably shorter code because there is no direct fpu->mm register
  855. copy instruction
  856. }
  857. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  858. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  859. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  860. end;
  861. end;
  862. procedure tx86addnode.second_addfloatavx;
  863. var
  864. op : topcg;
  865. sqr_sum : boolean;
  866. {$ifdef dummy}
  867. tmp : tnode;
  868. {$endif dummy}
  869. begin
  870. sqr_sum:=false;
  871. {$ifdef dummy}
  872. if (current_settings.fputype>=fpu_sse3) and
  873. use_vectorfpu(resultdef) and
  874. (nodetype in [addn,subn]) and
  875. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  876. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  877. begin
  878. sqr_sum:=true;
  879. tmp:=tinlinenode(left).left;
  880. tinlinenode(left).left:=nil;
  881. left.free;
  882. left:=tmp;
  883. tmp:=tinlinenode(right).left;
  884. tinlinenode(right).left:=nil;
  885. right.free;
  886. right:=tmp;
  887. end;
  888. {$endif dummy}
  889. pass_left_right;
  890. { fpu operands are always in reversed order on the stack }
  891. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  892. toggleflag(nf_swapped);
  893. if (nf_swapped in flags) then
  894. { can't use swapleftright if both are on the fpu stack, since then }
  895. { both are "R_ST" -> nothing would change -> manually switch }
  896. if (left.location.loc = LOC_FPUREGISTER) and
  897. (right.location.loc = LOC_FPUREGISTER) then
  898. emit_none(A_FXCH,S_NO)
  899. else
  900. swapleftright;
  901. case nodetype of
  902. addn :
  903. op:=OP_ADD;
  904. muln :
  905. op:=OP_MUL;
  906. subn :
  907. op:=OP_SUB;
  908. slashn :
  909. op:=OP_DIV;
  910. else
  911. internalerror(200312231);
  912. end;
  913. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  914. if sqr_sum then
  915. begin
  916. if nf_swapped in flags then
  917. swapleftright;
  918. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  919. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  920. location:=left.location;
  921. if is_double(resultdef) then
  922. begin
  923. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  924. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  925. case nodetype of
  926. addn:
  927. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  928. subn:
  929. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  930. else
  931. internalerror(201108162);
  932. end;
  933. end
  934. else
  935. begin
  936. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  937. { ensure that bits 64..127 contain valid values }
  938. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  939. { the data is now in bits 0..32 and 64..95 }
  940. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  941. case nodetype of
  942. addn:
  943. begin
  944. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  945. end;
  946. subn:
  947. begin
  948. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  949. end;
  950. else
  951. internalerror(201108163);
  952. end;
  953. end
  954. end
  955. { left*2 ? }
  956. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  957. begin
  958. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  959. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  960. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  961. left.location.register,
  962. left.location.register,
  963. location.register,
  964. mms_movescalar);
  965. end
  966. { right*2 ? }
  967. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  968. begin
  969. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  970. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  971. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  972. right.location.register,
  973. right.location.register,
  974. location.register,
  975. mms_movescalar);
  976. end
  977. { we can use only right as left operand if the operation is commutative }
  978. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  979. begin
  980. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  981. { force floating point reg. location to be written to memory,
  982. we don't force it to mm register because writing to memory
  983. allows probably shorter code because there is no direct fpu->mm register
  984. copy instruction
  985. }
  986. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  987. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  988. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  989. left.location,
  990. right.location.register,
  991. location.register,
  992. mms_movescalar);
  993. end
  994. else
  995. begin
  996. if (nf_swapped in flags) then
  997. swapleftright;
  998. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  999. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1000. { force floating point reg. location to be written to memory,
  1001. we don't force it to mm register because writing to memory
  1002. allows probably shorter code because there is no direct fpu->mm register
  1003. copy instruction
  1004. }
  1005. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1006. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1007. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1008. right.location,
  1009. left.location.register,
  1010. location.register,
  1011. mms_movescalar);
  1012. end;
  1013. end;
  1014. function tx86addnode.use_fma : boolean;
  1015. begin
  1016. {$ifndef i8086}
  1017. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1018. Result:=use_vectorfpu(resultdef) and
  1019. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1020. {$else i8086}
  1021. Result:=inherited use_fma;
  1022. {$endif i8086}
  1023. end;
  1024. procedure tx86addnode.second_cmpfloatvector;
  1025. var
  1026. op : tasmop;
  1027. const
  1028. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1029. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1030. begin
  1031. if is_single(left.resultdef) then
  1032. op:=ops_single[UseAVX]
  1033. else if is_double(left.resultdef) then
  1034. op:=ops_double[UseAVX]
  1035. else
  1036. internalerror(200402222);
  1037. pass_left_right;
  1038. location_reset(location,LOC_FLAGS,OS_NO);
  1039. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1040. memory (not to mm registers because one of the memory locations can be used
  1041. directly in compare instruction, yielding shorter code) }
  1042. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1043. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1044. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1045. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1046. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1047. begin
  1048. case left.location.loc of
  1049. LOC_REFERENCE,LOC_CREFERENCE:
  1050. begin
  1051. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1052. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1053. end;
  1054. LOC_MMREGISTER,LOC_CMMREGISTER:
  1055. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1056. else
  1057. internalerror(200402221);
  1058. end;
  1059. toggleflag(nf_swapped);
  1060. end
  1061. else
  1062. begin
  1063. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1064. case right.location.loc of
  1065. LOC_REFERENCE,LOC_CREFERENCE:
  1066. begin
  1067. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1068. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1069. end;
  1070. LOC_MMREGISTER,LOC_CMMREGISTER:
  1071. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1072. else
  1073. internalerror(200402223);
  1074. end;
  1075. end;
  1076. location.resflags:=getfpuresflags;
  1077. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1078. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1079. end;
  1080. procedure tx86addnode.second_opvector;
  1081. var
  1082. op : topcg;
  1083. begin
  1084. pass_left_right;
  1085. if (nf_swapped in flags) then
  1086. swapleftright;
  1087. case nodetype of
  1088. addn :
  1089. op:=OP_ADD;
  1090. muln :
  1091. op:=OP_MUL;
  1092. subn :
  1093. op:=OP_SUB;
  1094. slashn :
  1095. op:=OP_DIV;
  1096. else
  1097. internalerror(200610071);
  1098. end;
  1099. if fits_in_mm_register(left.resultdef) then
  1100. begin
  1101. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1102. { we can use only right as left operand if the operation is commutative }
  1103. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1104. begin
  1105. location.register:=right.location.register;
  1106. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1107. end
  1108. else
  1109. begin
  1110. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1111. location.register:=left.location.register;
  1112. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1113. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1114. end;
  1115. end
  1116. else
  1117. begin
  1118. { not yet supported }
  1119. internalerror(200610072);
  1120. end
  1121. end;
  1122. procedure tx86addnode.second_addfloat;
  1123. const
  1124. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1125. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1126. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1127. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1128. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1129. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1130. var
  1131. op : TAsmOp;
  1132. refnode : tnode;
  1133. hasref : boolean;
  1134. begin
  1135. if use_vectorfpu(resultdef) then
  1136. begin
  1137. if UseAVX then
  1138. second_addfloatavx
  1139. else
  1140. second_addfloatsse;
  1141. exit;
  1142. end;
  1143. pass_left_right;
  1144. prepare_x87_locations(refnode);
  1145. hasref:=assigned(refnode);
  1146. case nodetype of
  1147. addn :
  1148. op:=ops_add[hasref];
  1149. muln :
  1150. op:=ops_mul[hasref];
  1151. subn :
  1152. if (nf_swapped in flags) then
  1153. op:=ops_rsub[hasref]
  1154. else
  1155. op:=ops_sub[hasref];
  1156. slashn :
  1157. if (nf_swapped in flags) then
  1158. op:=ops_rdiv[hasref]
  1159. else
  1160. op:=ops_div[hasref];
  1161. else
  1162. internalerror(2003042214);
  1163. end;
  1164. if hasref then
  1165. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1166. else
  1167. begin
  1168. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1169. tcgx86(cg).dec_fpu_stack;
  1170. end;
  1171. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1172. location.register:=NR_ST;
  1173. end;
  1174. procedure tx86addnode.second_cmpfloat;
  1175. {$ifdef i8086}
  1176. var
  1177. tmpref: treference;
  1178. {$endif i8086}
  1179. begin
  1180. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1181. begin
  1182. second_cmpfloatvector;
  1183. exit;
  1184. end;
  1185. pass_left_right;
  1186. force_left_and_right_fpureg;
  1187. {$ifndef x86_64}
  1188. if current_settings.cputype<cpu_Pentium2 then
  1189. begin
  1190. emit_none(A_FCOMPP,S_NO);
  1191. tcgx86(cg).dec_fpu_stack;
  1192. tcgx86(cg).dec_fpu_stack;
  1193. { load fpu flags }
  1194. {$ifdef i8086}
  1195. if current_settings.cputype < cpu_286 then
  1196. begin
  1197. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1198. emit_ref(A_FSTSW,S_NO,tmpref);
  1199. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1200. inc(tmpref.offset);
  1201. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1202. dec(tmpref.offset);
  1203. emit_none(A_SAHF,S_NO);
  1204. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1205. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1206. end
  1207. else
  1208. {$endif i8086}
  1209. begin
  1210. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1211. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1212. emit_none(A_SAHF,S_NO);
  1213. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1214. end;
  1215. end
  1216. else
  1217. {$endif x86_64}
  1218. begin
  1219. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1220. { fcomip pops only one fpu register }
  1221. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1222. tcgx86(cg).dec_fpu_stack;
  1223. tcgx86(cg).dec_fpu_stack;
  1224. end;
  1225. location_reset(location,LOC_FLAGS,OS_NO);
  1226. location.resflags:=getfpuresflags;
  1227. end;
  1228. {*****************************************************************************
  1229. Add64bit
  1230. *****************************************************************************}
  1231. procedure tx86addnode.second_add64bit;
  1232. begin
  1233. {$ifdef cpu64bitalu}
  1234. second_addordinal;
  1235. {$else cpu64bitalu}
  1236. { must be implemented separate }
  1237. internalerror(200402042);
  1238. {$endif cpu64bitalu}
  1239. end;
  1240. procedure tx86addnode.second_cmp64bit;
  1241. begin
  1242. {$ifdef cpu64bitalu}
  1243. second_cmpordinal;
  1244. {$else cpu64bitalu}
  1245. { must be implemented separate }
  1246. internalerror(200402043);
  1247. {$endif cpu64bitalu}
  1248. end;
  1249. {*****************************************************************************
  1250. AddOrdinal
  1251. *****************************************************************************}
  1252. procedure tx86addnode.second_cmpordinal;
  1253. var
  1254. opdef : tdef;
  1255. opsize : tcgsize;
  1256. unsigned : boolean;
  1257. begin
  1258. unsigned:=not(is_signed(left.resultdef)) or
  1259. not(is_signed(right.resultdef));
  1260. opdef:=left.resultdef;
  1261. opsize:=def_cgsize(opdef);
  1262. pass_left_right;
  1263. if (right.location.loc=LOC_CONSTANT) and
  1264. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1265. {$ifdef x86_64}
  1266. and ((not (opsize in [OS_64,OS_S64])) or (
  1267. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1268. ))
  1269. {$endif x86_64}
  1270. then
  1271. begin
  1272. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1273. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1274. end
  1275. else
  1276. begin
  1277. left_must_be_reg(opdef,opsize,false);
  1278. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1279. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1280. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1281. end;
  1282. location_reset(location,LOC_FLAGS,OS_NO);
  1283. location.resflags:=getresflags(unsigned);
  1284. end;
  1285. begin
  1286. caddnode:=tx86addnode;
  1287. end.