nx86add.pas 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. {$ifdef SUPPORT_MMX}
  50. procedure second_opmmx;override;
  51. {$endif SUPPORT_MMX}
  52. procedure second_opvector;override;
  53. end;
  54. implementation
  55. uses
  56. globtype,globals,systems,
  57. verbose,cutils,
  58. cpuinfo,
  59. aasmbase,aasmtai,aasmdata,aasmcpu,
  60. symconst,symdef,
  61. cgobj,hlcgobj,cgx86,cga,cgutils,
  62. paramgr,tgobj,ncgutil,
  63. ncon,nset,ninl,
  64. defutil;
  65. {*****************************************************************************
  66. Helpers
  67. *****************************************************************************}
  68. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  69. var
  70. power : longint;
  71. hl4 : tasmlabel;
  72. r : Tregister;
  73. href : treference;
  74. begin
  75. { at this point, left.location.loc should be LOC_REGISTER }
  76. if right.location.loc=LOC_REGISTER then
  77. begin
  78. { right.location is a LOC_REGISTER }
  79. { when swapped another result register }
  80. if (nodetype=subn) and (nf_swapped in flags) then
  81. begin
  82. if extra_not then
  83. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  84. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  85. { newly swapped also set swapped flag }
  86. location_swap(left.location,right.location);
  87. toggleflag(nf_swapped);
  88. end
  89. else
  90. begin
  91. if extra_not then
  92. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  93. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  94. location_swap(left.location,right.location);
  95. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  96. end;
  97. end
  98. else
  99. begin
  100. { right.location is not a LOC_REGISTER }
  101. if (nodetype=subn) and (nf_swapped in flags) then
  102. begin
  103. if extra_not then
  104. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  105. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  106. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  107. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  108. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  109. end
  110. else
  111. begin
  112. { Optimizations when right.location is a constant value }
  113. if (op=A_CMP) and
  114. (nodetype in [equaln,unequaln]) and
  115. (right.location.loc=LOC_CONSTANT) and
  116. (right.location.value=0) then
  117. begin
  118. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  119. spilling, while 'test %reg,%reg' still requires loading into register.
  120. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  121. peephole optimizer (this optimization is currently available only for i386). }
  122. if (target_info.cpu=cpu_i386) then
  123. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  124. else
  125. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  126. end
  127. else
  128. if (op=A_ADD) and
  129. (right.location.loc=LOC_CONSTANT) and
  130. (right.location.value=1) and
  131. not(cs_check_overflow in current_settings.localswitches) then
  132. begin
  133. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  134. end
  135. else
  136. if (op=A_SUB) and
  137. (right.location.loc=LOC_CONSTANT) and
  138. (right.location.value=1) and
  139. not(cs_check_overflow in current_settings.localswitches) and
  140. UseIncDec then
  141. begin
  142. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  143. end
  144. else
  145. if (op=A_IMUL) and
  146. (right.location.loc=LOC_CONSTANT) and
  147. (ispowerof2(int64(right.location.value),power)) and
  148. not(cs_check_overflow in current_settings.localswitches) then
  149. begin
  150. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  151. end
  152. else if (op=A_IMUL) and
  153. (right.location.loc=LOC_CONSTANT) and
  154. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  155. (power in [1..3]) and
  156. not(cs_check_overflow in current_settings.localswitches) then
  157. begin
  158. reference_reset_base(href,left.location.register,0,0);
  159. href.index:=left.location.register;
  160. href.scalefactor:=int64(right.location.value)-1;
  161. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  162. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  163. end
  164. else
  165. begin
  166. if extra_not then
  167. begin
  168. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  169. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  170. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  171. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  172. end
  173. else
  174. begin
  175. emit_op_right_left(op,opsize);
  176. end;
  177. end;
  178. end;
  179. end;
  180. { only in case of overflow operations }
  181. { produce overflow code }
  182. { we must put it here directly, because sign of operation }
  183. { is in unsigned VAR!! }
  184. if mboverflow then
  185. begin
  186. if cs_check_overflow in current_settings.localswitches then
  187. begin
  188. current_asmdata.getjumplabel(hl4);
  189. if unsigned then
  190. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  191. else
  192. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  193. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  194. cg.a_label(current_asmdata.CurrAsmList,hl4);
  195. end;
  196. end;
  197. end;
  198. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  199. begin
  200. { left location is not a register? }
  201. if (left.location.loc<>LOC_REGISTER) then
  202. begin
  203. { if right is register then we can swap the locations }
  204. if (not noswap) and
  205. (right.location.loc=LOC_REGISTER) then
  206. begin
  207. location_swap(left.location,right.location);
  208. toggleflag(nf_swapped);
  209. end
  210. else
  211. begin
  212. { maybe we can reuse a constant register when the
  213. operation is a comparison that doesn't change the
  214. value of the register }
  215. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  216. end;
  217. end;
  218. if (right.location.loc<>LOC_CONSTANT) and
  219. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  220. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  221. if (left.location.loc<>LOC_CONSTANT) and
  222. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  223. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  224. end;
  225. procedure tx86addnode.force_left_and_right_fpureg;
  226. begin
  227. if (right.location.loc<>LOC_FPUREGISTER) then
  228. begin
  229. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  230. if (left.location.loc<>LOC_FPUREGISTER) then
  231. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  232. else
  233. { left was on the stack => swap }
  234. toggleflag(nf_swapped);
  235. end
  236. { the nominator in st0 }
  237. else if (left.location.loc<>LOC_FPUREGISTER) then
  238. begin
  239. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  240. end
  241. else
  242. begin
  243. { fpu operands are always in the wrong order on the stack }
  244. toggleflag(nf_swapped);
  245. end;
  246. end;
  247. { Makes sides suitable for executing an x87 instruction:
  248. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  249. everything else is loaded to FPU stack. }
  250. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  251. begin
  252. refnode:=nil;
  253. { later on, no mm registers are allowed, so transfer everything to memory here
  254. below it is loaded into an fpu register if neede }
  255. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  256. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  257. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  258. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  259. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  260. 0:
  261. begin
  262. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  263. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  264. InternalError(2013090803);
  265. if (left.location.size in [OS_F32,OS_F64]) then
  266. begin
  267. refnode:=left;
  268. toggleflag(nf_swapped);
  269. end
  270. else
  271. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  272. end;
  273. 1:
  274. begin { if left is on the stack then swap. }
  275. if (left.location.loc=LOC_FPUREGISTER) then
  276. refnode:=right
  277. else
  278. refnode:=left;
  279. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  280. InternalError(2013090801);
  281. if not (refnode.location.size in [OS_F32,OS_F64]) then
  282. begin
  283. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  284. if (refnode=right) then
  285. toggleflag(nf_swapped);
  286. refnode:=nil;
  287. end
  288. else
  289. begin
  290. if (refnode=left) then
  291. toggleflag(nf_swapped);
  292. end;
  293. end;
  294. 2: { fpu operands are always in the wrong order on the stack }
  295. toggleflag(nf_swapped);
  296. else
  297. InternalError(2013090802);
  298. end;
  299. end;
  300. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  301. {$ifdef x86_64}
  302. var
  303. tmpreg : tregister;
  304. {$endif x86_64}
  305. begin
  306. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  307. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  308. { left must be a register }
  309. case right.location.loc of
  310. LOC_REGISTER,
  311. LOC_CREGISTER :
  312. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  313. LOC_REFERENCE,
  314. LOC_CREFERENCE :
  315. begin
  316. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  317. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  318. end;
  319. LOC_CONSTANT :
  320. begin
  321. {$ifdef x86_64}
  322. { x86_64 only supports signed 32 bits constants directly }
  323. if (opsize in [OS_S64,OS_64]) and
  324. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  325. begin
  326. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  327. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  328. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  329. end
  330. else
  331. {$endif x86_64}
  332. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  333. end;
  334. else
  335. internalerror(200203232);
  336. end;
  337. end;
  338. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  339. begin
  340. case nodetype of
  341. equaln : getresflags:=F_E;
  342. unequaln : getresflags:=F_NE;
  343. else
  344. if not(unsigned) then
  345. begin
  346. if nf_swapped in flags then
  347. case nodetype of
  348. ltn : getresflags:=F_G;
  349. lten : getresflags:=F_GE;
  350. gtn : getresflags:=F_L;
  351. gten : getresflags:=F_LE;
  352. else
  353. internalerror(2013120105);
  354. end
  355. else
  356. case nodetype of
  357. ltn : getresflags:=F_L;
  358. lten : getresflags:=F_LE;
  359. gtn : getresflags:=F_G;
  360. gten : getresflags:=F_GE;
  361. else
  362. internalerror(2013120106);
  363. end;
  364. end
  365. else
  366. begin
  367. if nf_swapped in flags then
  368. case nodetype of
  369. ltn : getresflags:=F_A;
  370. lten : getresflags:=F_AE;
  371. gtn : getresflags:=F_B;
  372. gten : getresflags:=F_BE;
  373. else
  374. internalerror(2013120107);
  375. end
  376. else
  377. case nodetype of
  378. ltn : getresflags:=F_B;
  379. lten : getresflags:=F_BE;
  380. gtn : getresflags:=F_A;
  381. gten : getresflags:=F_AE;
  382. else
  383. internalerror(2013120108);
  384. end;
  385. end;
  386. end;
  387. end;
  388. function tx86addnode.getfpuresflags : tresflags;
  389. begin
  390. if (nodetype=equaln) then
  391. result:=F_FE
  392. else if (nodetype=unequaln) then
  393. result:=F_FNE
  394. else if (nf_swapped in flags) then
  395. case nodetype of
  396. ltn : result:=F_FA;
  397. lten : result:=F_FAE;
  398. gtn : result:=F_FB;
  399. gten : result:=F_FBE;
  400. else
  401. internalerror(2014031402);
  402. end
  403. else
  404. case nodetype of
  405. ltn : result:=F_FB;
  406. lten : result:=F_FBE;
  407. gtn : result:=F_FA;
  408. gten : result:=F_FAE;
  409. else
  410. internalerror(2014031403);
  411. end;
  412. end;
  413. {*****************************************************************************
  414. AddSmallSet
  415. *****************************************************************************}
  416. {$ifndef i8086}
  417. procedure tx86addnode.second_addsmallset;
  418. var
  419. setbase : aint;
  420. opdef : tdef;
  421. opsize : TCGSize;
  422. op : TAsmOp;
  423. extra_not,
  424. noswap : boolean;
  425. all_member_optimization:boolean;
  426. begin
  427. pass_left_right;
  428. noswap:=false;
  429. extra_not:=false;
  430. all_member_optimization:=false;
  431. opdef:=resultdef;
  432. opsize:=int_cgsize(opdef.size);
  433. if (left.resultdef.typ=setdef) then
  434. setbase:=tsetdef(left.resultdef).setbase
  435. else
  436. setbase:=tsetdef(right.resultdef).setbase;
  437. case nodetype of
  438. addn :
  439. begin
  440. { adding elements is not commutative }
  441. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  442. swapleftright;
  443. { are we adding set elements ? }
  444. if right.nodetype=setelementn then
  445. begin
  446. { no range support for smallsets! }
  447. if assigned(tsetelementnode(right).right) then
  448. internalerror(43244);
  449. { btsb isn't supported }
  450. if opsize=OS_8 then
  451. begin
  452. opsize:=OS_32;
  453. opdef:=u32inttype;
  454. end;
  455. { bts requires both elements to be registers }
  456. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  457. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  458. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  459. op:=A_BTS;
  460. noswap:=true;
  461. end
  462. else
  463. op:=A_OR;
  464. end;
  465. symdifn :
  466. op:=A_XOR;
  467. muln :
  468. op:=A_AND;
  469. subn :
  470. begin
  471. op:=A_AND;
  472. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  473. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  474. all_member_optimization:=true;
  475. if (not(nf_swapped in flags)) and
  476. (right.location.loc=LOC_CONSTANT) then
  477. right.location.value := not(right.location.value)
  478. else if (nf_swapped in flags) and
  479. (left.location.loc=LOC_CONSTANT) then
  480. left.location.value := not(left.location.value)
  481. else
  482. extra_not:=true;
  483. end;
  484. xorn :
  485. op:=A_XOR;
  486. orn :
  487. op:=A_OR;
  488. andn :
  489. op:=A_AND;
  490. else
  491. internalerror(2003042215);
  492. end;
  493. if all_member_optimization then
  494. begin
  495. {A set expression [0..31]-x can be implemented with a simple NOT.}
  496. if nf_swapped in flags then
  497. begin
  498. { newly swapped also set swapped flag }
  499. location_swap(left.location,right.location);
  500. toggleflag(nf_swapped);
  501. end;
  502. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  503. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  504. location:=right.location;
  505. end
  506. else
  507. begin
  508. { left must be a register }
  509. left_must_be_reg(opdef,opsize,noswap);
  510. emit_generic_code(op,opsize,true,extra_not,false);
  511. location_freetemp(current_asmdata.CurrAsmList,right.location);
  512. { left is always a register and contains the result }
  513. location:=left.location;
  514. end;
  515. { fix the changed opsize we did above because of the missing btsb }
  516. if opsize<>int_cgsize(resultdef.size) then
  517. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  518. end;
  519. {$endif not i8086}
  520. procedure tx86addnode.second_cmpsmallset;
  521. var
  522. opdef : tdef;
  523. opsize : TCGSize;
  524. op : TAsmOp;
  525. begin
  526. pass_left_right;
  527. opdef:=left.resultdef;
  528. opsize:=int_cgsize(opdef.size);
  529. case nodetype of
  530. equaln,
  531. unequaln :
  532. op:=A_CMP;
  533. lten,gten:
  534. begin
  535. if (not(nf_swapped in flags) and (nodetype = lten)) or
  536. ((nf_swapped in flags) and (nodetype = gten)) then
  537. swapleftright;
  538. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  539. emit_op_right_left(A_AND,opsize);
  540. op:=A_CMP;
  541. { warning: ugly hack, we need a JE so change the node to equaln }
  542. nodetype:=equaln;
  543. end;
  544. else
  545. internalerror(2003042215);
  546. end;
  547. { left must be a register }
  548. left_must_be_reg(opdef,opsize,false);
  549. emit_generic_code(op,opsize,true,false,false);
  550. location_freetemp(current_asmdata.CurrAsmList,right.location);
  551. location_freetemp(current_asmdata.CurrAsmList,left.location);
  552. location_reset(location,LOC_FLAGS,OS_NO);
  553. location.resflags:=getresflags(true);
  554. end;
  555. {*****************************************************************************
  556. AddMMX
  557. *****************************************************************************}
  558. {$ifdef SUPPORT_MMX}
  559. procedure tx86addnode.second_opmmx;
  560. var
  561. op : TAsmOp;
  562. cmpop : boolean;
  563. mmxbase : tmmxtype;
  564. hreg,
  565. hregister : tregister;
  566. begin
  567. pass_left_right;
  568. cmpop:=false;
  569. mmxbase:=mmx_type(left.resultdef);
  570. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  571. case nodetype of
  572. addn :
  573. begin
  574. if (cs_mmx_saturation in current_settings.localswitches) then
  575. begin
  576. case mmxbase of
  577. mmxs8bit:
  578. op:=A_PADDSB;
  579. mmxu8bit:
  580. op:=A_PADDUSB;
  581. mmxs16bit,mmxfixed16:
  582. op:=A_PADDSW;
  583. mmxu16bit:
  584. op:=A_PADDUSW;
  585. end;
  586. end
  587. else
  588. begin
  589. case mmxbase of
  590. mmxs8bit,mmxu8bit:
  591. op:=A_PADDB;
  592. mmxs16bit,mmxu16bit,mmxfixed16:
  593. op:=A_PADDW;
  594. mmxs32bit,mmxu32bit:
  595. op:=A_PADDD;
  596. end;
  597. end;
  598. end;
  599. muln :
  600. begin
  601. case mmxbase of
  602. mmxs16bit,mmxu16bit:
  603. op:=A_PMULLW;
  604. mmxfixed16:
  605. op:=A_PMULHW;
  606. end;
  607. end;
  608. subn :
  609. begin
  610. if (cs_mmx_saturation in current_settings.localswitches) then
  611. begin
  612. case mmxbase of
  613. mmxs8bit:
  614. op:=A_PSUBSB;
  615. mmxu8bit:
  616. op:=A_PSUBUSB;
  617. mmxs16bit,mmxfixed16:
  618. op:=A_PSUBSB;
  619. mmxu16bit:
  620. op:=A_PSUBUSW;
  621. end;
  622. end
  623. else
  624. begin
  625. case mmxbase of
  626. mmxs8bit,mmxu8bit:
  627. op:=A_PSUBB;
  628. mmxs16bit,mmxu16bit,mmxfixed16:
  629. op:=A_PSUBW;
  630. mmxs32bit,mmxu32bit:
  631. op:=A_PSUBD;
  632. end;
  633. end;
  634. end;
  635. xorn:
  636. op:=A_PXOR;
  637. orn:
  638. op:=A_POR;
  639. andn:
  640. op:=A_PAND;
  641. else
  642. internalerror(2003042214);
  643. end;
  644. { left and right no register? }
  645. { then one must be demanded }
  646. if (left.location.loc<>LOC_MMXREGISTER) then
  647. begin
  648. if (right.location.loc=LOC_MMXREGISTER) then
  649. begin
  650. location_swap(left.location,right.location);
  651. toggleflag(nf_swapped);
  652. end
  653. else
  654. begin
  655. { register variable ? }
  656. if (left.location.loc=LOC_CMMXREGISTER) then
  657. begin
  658. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  659. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  660. end
  661. else
  662. begin
  663. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  664. internalerror(200203245);
  665. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  666. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  667. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  668. end;
  669. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  670. left.location.register:=hregister;
  671. end;
  672. end;
  673. { at this point, left.location.loc should be LOC_MMXREGISTER }
  674. if right.location.loc<>LOC_MMXREGISTER then
  675. begin
  676. if (nodetype=subn) and (nf_swapped in flags) then
  677. begin
  678. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  679. if right.location.loc=LOC_CMMXREGISTER then
  680. begin
  681. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  682. emit_reg_reg(op,S_NO,left.location.register,hreg);
  683. end
  684. else
  685. begin
  686. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  687. internalerror(200203247);
  688. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  689. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  690. emit_reg_reg(op,S_NO,left.location.register,hreg);
  691. end;
  692. location.register:=hreg;
  693. end
  694. else
  695. begin
  696. if (right.location.loc=LOC_CMMXREGISTER) then
  697. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  698. else
  699. begin
  700. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  701. internalerror(200203246);
  702. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  703. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  704. end;
  705. location.register:=left.location.register;
  706. end;
  707. end
  708. else
  709. begin
  710. { right.location=LOC_MMXREGISTER }
  711. if (nodetype=subn) and (nf_swapped in flags) then
  712. begin
  713. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  714. location_swap(left.location,right.location);
  715. toggleflag(nf_swapped);
  716. end
  717. else
  718. begin
  719. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  720. end;
  721. location.register:=left.location.register;
  722. end;
  723. location_freetemp(current_asmdata.CurrAsmList,right.location);
  724. if cmpop then
  725. location_freetemp(current_asmdata.CurrAsmList,left.location);
  726. end;
  727. {$endif SUPPORT_MMX}
  728. {*****************************************************************************
  729. AddFloat
  730. *****************************************************************************}
  731. procedure tx86addnode.second_addfloatsse;
  732. var
  733. op : topcg;
  734. sqr_sum : boolean;
  735. tmp : tnode;
  736. begin
  737. sqr_sum:=false;
  738. if (current_settings.fputype>=fpu_sse3) and
  739. use_vectorfpu(resultdef) and
  740. (nodetype in [addn,subn]) and
  741. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  742. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  743. begin
  744. sqr_sum:=true;
  745. tmp:=tinlinenode(left).left;
  746. tinlinenode(left).left:=nil;
  747. left.free;
  748. left:=tmp;
  749. tmp:=tinlinenode(right).left;
  750. tinlinenode(right).left:=nil;
  751. right.free;
  752. right:=tmp;
  753. end;
  754. pass_left_right;
  755. { fpu operands are always in reversed order on the stack }
  756. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  757. toggleflag(nf_swapped);
  758. if (nf_swapped in flags) then
  759. { can't use swapleftright if both are on the fpu stack, since then }
  760. { both are "R_ST" -> nothing would change -> manually switch }
  761. if (left.location.loc = LOC_FPUREGISTER) and
  762. (right.location.loc = LOC_FPUREGISTER) then
  763. emit_none(A_FXCH,S_NO)
  764. else
  765. swapleftright;
  766. case nodetype of
  767. addn :
  768. op:=OP_ADD;
  769. muln :
  770. op:=OP_MUL;
  771. subn :
  772. op:=OP_SUB;
  773. slashn :
  774. op:=OP_DIV;
  775. else
  776. internalerror(200312231);
  777. end;
  778. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  779. if sqr_sum then
  780. begin
  781. if nf_swapped in flags then
  782. swapleftright;
  783. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  784. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  785. location:=left.location;
  786. if is_double(resultdef) then
  787. begin
  788. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  789. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  790. case nodetype of
  791. addn:
  792. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  793. subn:
  794. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  795. else
  796. internalerror(201108162);
  797. end;
  798. end
  799. else
  800. begin
  801. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  802. { ensure that bits 64..127 contain valid values }
  803. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  804. { the data is now in bits 0..32 and 64..95 }
  805. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  806. case nodetype of
  807. addn:
  808. begin
  809. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  810. end;
  811. subn:
  812. begin
  813. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  814. end;
  815. else
  816. internalerror(201108163);
  817. end;
  818. end
  819. end
  820. { we can use only right as left operand if the operation is commutative }
  821. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  822. begin
  823. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  824. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  825. { force floating point reg. location to be written to memory,
  826. we don't force it to mm register because writing to memory
  827. allows probably shorter code because there is no direct fpu->mm register
  828. copy instruction
  829. }
  830. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  831. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  832. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  833. end
  834. else
  835. begin
  836. if nf_swapped in flags then
  837. swapleftright;
  838. { force floating point reg. location to be written to memory,
  839. we don't force it to mm register because writing to memory
  840. allows probably shorter code because there is no direct fpu->mm register
  841. copy instruction
  842. }
  843. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  844. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  845. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  846. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  847. { force floating point reg. location to be written to memory,
  848. we don't force it to mm register because writing to memory
  849. allows probably shorter code because there is no direct fpu->mm register
  850. copy instruction
  851. }
  852. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  853. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  854. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  855. end;
  856. end;
  857. procedure tx86addnode.second_addfloatavx;
  858. var
  859. op : topcg;
  860. sqr_sum : boolean;
  861. tmp : tnode;
  862. begin
  863. sqr_sum:=false;
  864. {$ifdef dummy}
  865. if (current_settings.fputype>=fpu_sse3) and
  866. use_vectorfpu(resultdef) and
  867. (nodetype in [addn,subn]) and
  868. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  869. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  870. begin
  871. sqr_sum:=true;
  872. tmp:=tinlinenode(left).left;
  873. tinlinenode(left).left:=nil;
  874. left.free;
  875. left:=tmp;
  876. tmp:=tinlinenode(right).left;
  877. tinlinenode(right).left:=nil;
  878. right.free;
  879. right:=tmp;
  880. end;
  881. {$endif dummy}
  882. pass_left_right;
  883. { fpu operands are always in reversed order on the stack }
  884. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  885. toggleflag(nf_swapped);
  886. if (nf_swapped in flags) then
  887. { can't use swapleftright if both are on the fpu stack, since then }
  888. { both are "R_ST" -> nothing would change -> manually switch }
  889. if (left.location.loc = LOC_FPUREGISTER) and
  890. (right.location.loc = LOC_FPUREGISTER) then
  891. emit_none(A_FXCH,S_NO)
  892. else
  893. swapleftright;
  894. case nodetype of
  895. addn :
  896. op:=OP_ADD;
  897. muln :
  898. op:=OP_MUL;
  899. subn :
  900. op:=OP_SUB;
  901. slashn :
  902. op:=OP_DIV;
  903. else
  904. internalerror(200312231);
  905. end;
  906. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  907. if sqr_sum then
  908. begin
  909. if nf_swapped in flags then
  910. swapleftright;
  911. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  912. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  913. location:=left.location;
  914. if is_double(resultdef) then
  915. begin
  916. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  917. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  918. case nodetype of
  919. addn:
  920. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  921. subn:
  922. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  923. else
  924. internalerror(201108162);
  925. end;
  926. end
  927. else
  928. begin
  929. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  930. { ensure that bits 64..127 contain valid values }
  931. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  932. { the data is now in bits 0..32 and 64..95 }
  933. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  934. case nodetype of
  935. addn:
  936. begin
  937. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  938. end;
  939. subn:
  940. begin
  941. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  942. end;
  943. else
  944. internalerror(201108163);
  945. end;
  946. end
  947. end
  948. { left*2 ? }
  949. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  950. begin
  951. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  952. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  953. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  954. left.location.register,
  955. left.location.register,
  956. location.register,
  957. mms_movescalar);
  958. end
  959. { right*2 ? }
  960. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  961. begin
  962. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  963. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  964. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  965. right.location.register,
  966. right.location.register,
  967. location.register,
  968. mms_movescalar);
  969. end
  970. { we can use only right as left operand if the operation is commutative }
  971. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  972. begin
  973. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  974. { force floating point reg. location to be written to memory,
  975. we don't force it to mm register because writing to memory
  976. allows probably shorter code because there is no direct fpu->mm register
  977. copy instruction
  978. }
  979. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  980. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  981. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  982. left.location,
  983. right.location.register,
  984. location.register,
  985. mms_movescalar);
  986. end
  987. else
  988. begin
  989. if (nf_swapped in flags) then
  990. swapleftright;
  991. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  992. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  993. { force floating point reg. location to be written to memory,
  994. we don't force it to mm register because writing to memory
  995. allows probably shorter code because there is no direct fpu->mm register
  996. copy instruction
  997. }
  998. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  999. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1000. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1001. right.location,
  1002. left.location.register,
  1003. location.register,
  1004. mms_movescalar);
  1005. end;
  1006. end;
  1007. function tx86addnode.use_fma : boolean;
  1008. begin
  1009. {$ifndef i8086}
  1010. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1011. Result:=use_vectorfpu(resultdef) and
  1012. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1013. {$else i8086}
  1014. Result:=inherited use_fma;
  1015. {$endif i8086}
  1016. end;
  1017. procedure tx86addnode.second_cmpfloatvector;
  1018. var
  1019. op : tasmop;
  1020. const
  1021. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1022. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1023. begin
  1024. if is_single(left.resultdef) then
  1025. op:=ops_single[UseAVX]
  1026. else if is_double(left.resultdef) then
  1027. op:=ops_double[UseAVX]
  1028. else
  1029. internalerror(200402222);
  1030. pass_left_right;
  1031. location_reset(location,LOC_FLAGS,OS_NO);
  1032. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1033. memory (not to mm registers because one of the memory locations can be used
  1034. directly in compare instruction, yielding shorter code) }
  1035. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1036. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1037. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1038. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1039. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1040. begin
  1041. case left.location.loc of
  1042. LOC_REFERENCE,LOC_CREFERENCE:
  1043. begin
  1044. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1045. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1046. end;
  1047. LOC_MMREGISTER,LOC_CMMREGISTER:
  1048. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1049. else
  1050. internalerror(200402221);
  1051. end;
  1052. toggleflag(nf_swapped);
  1053. end
  1054. else
  1055. begin
  1056. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1057. case right.location.loc of
  1058. LOC_REFERENCE,LOC_CREFERENCE:
  1059. begin
  1060. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1061. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1062. end;
  1063. LOC_MMREGISTER,LOC_CMMREGISTER:
  1064. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1065. else
  1066. internalerror(200402223);
  1067. end;
  1068. end;
  1069. location.resflags:=getfpuresflags;
  1070. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1071. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1072. end;
  1073. procedure tx86addnode.second_opvector;
  1074. var
  1075. op : topcg;
  1076. begin
  1077. pass_left_right;
  1078. if (nf_swapped in flags) then
  1079. swapleftright;
  1080. case nodetype of
  1081. addn :
  1082. op:=OP_ADD;
  1083. muln :
  1084. op:=OP_MUL;
  1085. subn :
  1086. op:=OP_SUB;
  1087. slashn :
  1088. op:=OP_DIV;
  1089. else
  1090. internalerror(200610071);
  1091. end;
  1092. if fits_in_mm_register(left.resultdef) then
  1093. begin
  1094. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1095. { we can use only right as left operand if the operation is commutative }
  1096. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1097. begin
  1098. location.register:=right.location.register;
  1099. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1100. end
  1101. else
  1102. begin
  1103. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1104. location.register:=left.location.register;
  1105. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1106. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1107. end;
  1108. end
  1109. else
  1110. begin
  1111. { not yet supported }
  1112. internalerror(200610072);
  1113. end
  1114. end;
  1115. procedure tx86addnode.second_addfloat;
  1116. const
  1117. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1118. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1119. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1120. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1121. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1122. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1123. var
  1124. op : TAsmOp;
  1125. refnode : tnode;
  1126. hasref : boolean;
  1127. begin
  1128. if use_vectorfpu(resultdef) then
  1129. begin
  1130. if UseAVX then
  1131. second_addfloatavx
  1132. else
  1133. second_addfloatsse;
  1134. exit;
  1135. end;
  1136. pass_left_right;
  1137. prepare_x87_locations(refnode);
  1138. hasref:=assigned(refnode);
  1139. case nodetype of
  1140. addn :
  1141. op:=ops_add[hasref];
  1142. muln :
  1143. op:=ops_mul[hasref];
  1144. subn :
  1145. if (nf_swapped in flags) then
  1146. op:=ops_rsub[hasref]
  1147. else
  1148. op:=ops_sub[hasref];
  1149. slashn :
  1150. if (nf_swapped in flags) then
  1151. op:=ops_rdiv[hasref]
  1152. else
  1153. op:=ops_div[hasref];
  1154. else
  1155. internalerror(2003042214);
  1156. end;
  1157. if hasref then
  1158. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1159. else
  1160. begin
  1161. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1162. tcgx86(cg).dec_fpu_stack;
  1163. end;
  1164. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1165. location.register:=NR_ST;
  1166. end;
  1167. procedure tx86addnode.second_cmpfloat;
  1168. {$ifdef i8086}
  1169. var
  1170. tmpref: treference;
  1171. {$endif i8086}
  1172. begin
  1173. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1174. begin
  1175. second_cmpfloatvector;
  1176. exit;
  1177. end;
  1178. pass_left_right;
  1179. force_left_and_right_fpureg;
  1180. {$ifndef x86_64}
  1181. if current_settings.cputype<cpu_Pentium2 then
  1182. begin
  1183. emit_none(A_FCOMPP,S_NO);
  1184. tcgx86(cg).dec_fpu_stack;
  1185. tcgx86(cg).dec_fpu_stack;
  1186. { load fpu flags }
  1187. {$ifdef i8086}
  1188. if current_settings.cputype < cpu_286 then
  1189. begin
  1190. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1191. emit_ref(A_FSTSW,S_NO,tmpref);
  1192. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1193. inc(tmpref.offset);
  1194. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1195. dec(tmpref.offset);
  1196. emit_none(A_SAHF,S_NO);
  1197. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1198. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1199. end
  1200. else
  1201. {$endif i8086}
  1202. begin
  1203. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1204. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1205. emit_none(A_SAHF,S_NO);
  1206. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1207. end;
  1208. end
  1209. else
  1210. {$endif x86_64}
  1211. begin
  1212. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1213. { fcomip pops only one fpu register }
  1214. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1215. tcgx86(cg).dec_fpu_stack;
  1216. tcgx86(cg).dec_fpu_stack;
  1217. end;
  1218. location_reset(location,LOC_FLAGS,OS_NO);
  1219. location.resflags:=getfpuresflags;
  1220. end;
  1221. {*****************************************************************************
  1222. Add64bit
  1223. *****************************************************************************}
  1224. procedure tx86addnode.second_add64bit;
  1225. begin
  1226. {$ifdef cpu64bitalu}
  1227. second_addordinal;
  1228. {$else cpu64bitalu}
  1229. { must be implemented separate }
  1230. internalerror(200402042);
  1231. {$endif cpu64bitalu}
  1232. end;
  1233. procedure tx86addnode.second_cmp64bit;
  1234. begin
  1235. {$ifdef cpu64bitalu}
  1236. second_cmpordinal;
  1237. {$else cpu64bitalu}
  1238. { must be implemented separate }
  1239. internalerror(200402043);
  1240. {$endif cpu64bitalu}
  1241. end;
  1242. {*****************************************************************************
  1243. AddOrdinal
  1244. *****************************************************************************}
  1245. procedure tx86addnode.second_cmpordinal;
  1246. var
  1247. opdef : tdef;
  1248. opsize : tcgsize;
  1249. unsigned : boolean;
  1250. begin
  1251. unsigned:=not(is_signed(left.resultdef)) or
  1252. not(is_signed(right.resultdef));
  1253. opdef:=left.resultdef;
  1254. opsize:=def_cgsize(opdef);
  1255. pass_left_right;
  1256. if (right.location.loc=LOC_CONSTANT) and
  1257. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1258. {$ifdef x86_64}
  1259. and ((not (opsize in [OS_64,OS_S64])) or (
  1260. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1261. ))
  1262. {$endif x86_64}
  1263. then
  1264. begin
  1265. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1266. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1267. end
  1268. else
  1269. begin
  1270. left_must_be_reg(opdef,opsize,false);
  1271. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1272. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1273. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1274. end;
  1275. location_reset(location,LOC_FLAGS,OS_NO);
  1276. location.resflags:=getresflags(unsigned);
  1277. end;
  1278. begin
  1279. caddnode:=tx86addnode;
  1280. end.