nx86add.pas 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. procedure second_mul;virtual;abstract;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_addordinal;override;
  40. procedure second_cmpfloat;override;
  41. procedure second_cmpsmallset;override;
  42. procedure second_cmp64bit;override;
  43. procedure second_cmpordinal;override;
  44. {$ifdef SUPPORT_MMX}
  45. procedure second_opmmxset;override;
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. procedure second_opvector;override;
  49. end;
  50. implementation
  51. uses
  52. globtype,globals,
  53. verbose,cutils,
  54. cpuinfo,
  55. aasmbase,aasmtai,aasmdata,aasmcpu,
  56. symconst,symdef,
  57. cgobj,cgx86,cga,cgutils,
  58. paramgr,tgobj,ncgutil,
  59. ncon,nset,
  60. defutil;
  61. {*****************************************************************************
  62. Helpers
  63. *****************************************************************************}
  64. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  65. var
  66. power : longint;
  67. hl4 : tasmlabel;
  68. r : Tregister;
  69. begin
  70. { at this point, left.location.loc should be LOC_REGISTER }
  71. if right.location.loc=LOC_REGISTER then
  72. begin
  73. { right.location is a LOC_REGISTER }
  74. { when swapped another result register }
  75. if (nodetype=subn) and (nf_swapped in flags) then
  76. begin
  77. if extra_not then
  78. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  79. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  80. { newly swapped also set swapped flag }
  81. location_swap(left.location,right.location);
  82. toggleflag(nf_swapped);
  83. end
  84. else
  85. begin
  86. if extra_not then
  87. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  88. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  89. location_swap(left.location,right.location);
  90. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  91. end;
  92. end
  93. else
  94. begin
  95. { right.location is not a LOC_REGISTER }
  96. if (nodetype=subn) and (nf_swapped in flags) then
  97. begin
  98. if extra_not then
  99. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  100. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  101. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  102. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  103. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  104. end
  105. else
  106. begin
  107. { Optimizations when right.location is a constant value }
  108. if (op=A_CMP) and
  109. (nodetype in [equaln,unequaln]) and
  110. (right.location.loc=LOC_CONSTANT) and
  111. (right.location.value=0) then
  112. begin
  113. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  114. end
  115. else
  116. if (op=A_ADD) and
  117. (right.location.loc=LOC_CONSTANT) and
  118. (right.location.value=1) and
  119. not(cs_check_overflow in current_settings.localswitches) then
  120. begin
  121. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  122. end
  123. else
  124. if (op=A_SUB) and
  125. (right.location.loc=LOC_CONSTANT) and
  126. (right.location.value=1) and
  127. not(cs_check_overflow in current_settings.localswitches) then
  128. begin
  129. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  130. end
  131. else
  132. if (op=A_IMUL) and
  133. (right.location.loc=LOC_CONSTANT) and
  134. (ispowerof2(int64(right.location.value),power)) and
  135. not(cs_check_overflow in current_settings.localswitches) then
  136. begin
  137. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  138. end
  139. else
  140. begin
  141. if extra_not then
  142. begin
  143. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  144. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  145. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  146. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  147. end
  148. else
  149. begin
  150. emit_op_right_left(op,opsize);
  151. end;
  152. end;
  153. end;
  154. end;
  155. { only in case of overflow operations }
  156. { produce overflow code }
  157. { we must put it here directly, because sign of operation }
  158. { is in unsigned VAR!! }
  159. if mboverflow then
  160. begin
  161. if cs_check_overflow in current_settings.localswitches then
  162. begin
  163. current_asmdata.getjumplabel(hl4);
  164. if unsigned then
  165. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  166. else
  167. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  168. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW');
  169. cg.a_label(current_asmdata.CurrAsmList,hl4);
  170. end;
  171. end;
  172. end;
  173. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  174. begin
  175. { left location is not a register? }
  176. if (left.location.loc<>LOC_REGISTER) then
  177. begin
  178. { if right is register then we can swap the locations }
  179. if (not noswap) and
  180. (right.location.loc=LOC_REGISTER) then
  181. begin
  182. location_swap(left.location,right.location);
  183. toggleflag(nf_swapped);
  184. end
  185. else
  186. begin
  187. { maybe we can reuse a constant register when the
  188. operation is a comparison that doesn't change the
  189. value of the register }
  190. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  191. end;
  192. end;
  193. end;
  194. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  195. begin
  196. if (right.location.loc<>LOC_FPUREGISTER) then
  197. begin
  198. if (force_fpureg) then
  199. begin
  200. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  201. if (left.location.loc<>LOC_FPUREGISTER) then
  202. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  203. else
  204. { left was on the stack => swap }
  205. toggleflag(nf_swapped);
  206. end
  207. end
  208. { the nominator in st0 }
  209. else if (left.location.loc<>LOC_FPUREGISTER) then
  210. begin
  211. if (force_fpureg) then
  212. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  213. end
  214. else
  215. begin
  216. { fpu operands are always in the wrong order on the stack }
  217. toggleflag(nf_swapped);
  218. end;
  219. end;
  220. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  221. {$ifdef x86_64}
  222. var
  223. tmpreg : tregister;
  224. {$endif x86_64}
  225. begin
  226. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  227. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
  228. { left must be a register }
  229. case right.location.loc of
  230. LOC_REGISTER,
  231. LOC_CREGISTER :
  232. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  233. LOC_REFERENCE,
  234. LOC_CREFERENCE :
  235. begin
  236. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  237. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  238. end;
  239. LOC_CONSTANT :
  240. begin
  241. {$ifdef x86_64}
  242. { x86_64 only supports signed 32 bits constants directly }
  243. if (opsize in [OS_S64,OS_64]) and
  244. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  245. begin
  246. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  247. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  248. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  249. end
  250. else
  251. {$endif x86_64}
  252. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  253. end;
  254. else
  255. internalerror(200203232);
  256. end;
  257. end;
  258. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  259. begin
  260. case nodetype of
  261. equaln : getresflags:=F_E;
  262. unequaln : getresflags:=F_NE;
  263. else
  264. if not(unsigned) then
  265. begin
  266. if nf_swapped in flags then
  267. case nodetype of
  268. ltn : getresflags:=F_G;
  269. lten : getresflags:=F_GE;
  270. gtn : getresflags:=F_L;
  271. gten : getresflags:=F_LE;
  272. end
  273. else
  274. case nodetype of
  275. ltn : getresflags:=F_L;
  276. lten : getresflags:=F_LE;
  277. gtn : getresflags:=F_G;
  278. gten : getresflags:=F_GE;
  279. end;
  280. end
  281. else
  282. begin
  283. if nf_swapped in flags then
  284. case nodetype of
  285. ltn : getresflags:=F_A;
  286. lten : getresflags:=F_AE;
  287. gtn : getresflags:=F_B;
  288. gten : getresflags:=F_BE;
  289. end
  290. else
  291. case nodetype of
  292. ltn : getresflags:=F_B;
  293. lten : getresflags:=F_BE;
  294. gtn : getresflags:=F_A;
  295. gten : getresflags:=F_AE;
  296. end;
  297. end;
  298. end;
  299. end;
  300. {*****************************************************************************
  301. AddSmallSet
  302. *****************************************************************************}
  303. procedure tx86addnode.second_addsmallset;
  304. var
  305. opsize : TCGSize;
  306. op : TAsmOp;
  307. extra_not,
  308. noswap : boolean;
  309. all_member_optimization:boolean;
  310. begin
  311. pass_left_right;
  312. noswap:=false;
  313. extra_not:=false;
  314. all_member_optimization:=false;
  315. opsize:=int_cgsize(resultdef.size);
  316. case nodetype of
  317. addn :
  318. begin
  319. { adding elements is not commutative }
  320. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  321. swapleftright;
  322. { are we adding set elements ? }
  323. if right.nodetype=setelementn then
  324. begin
  325. { no range support for smallsets! }
  326. if assigned(tsetelementnode(right).right) then
  327. internalerror(43244);
  328. { btsb isn't supported }
  329. if opsize=OS_8 then
  330. opsize:=OS_32;
  331. { bts requires both elements to be registers }
  332. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  333. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  334. op:=A_BTS;
  335. noswap:=true;
  336. end
  337. else
  338. op:=A_OR;
  339. end;
  340. symdifn :
  341. op:=A_XOR;
  342. muln :
  343. op:=A_AND;
  344. subn :
  345. begin
  346. op:=A_AND;
  347. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  348. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  349. all_member_optimization:=true;
  350. if (not(nf_swapped in flags)) and
  351. (right.location.loc=LOC_CONSTANT) then
  352. right.location.value := not(right.location.value)
  353. else if (nf_swapped in flags) and
  354. (left.location.loc=LOC_CONSTANT) then
  355. left.location.value := not(left.location.value)
  356. else
  357. extra_not:=true;
  358. end;
  359. xorn :
  360. op:=A_XOR;
  361. orn :
  362. op:=A_OR;
  363. andn :
  364. op:=A_AND;
  365. else
  366. internalerror(2003042215);
  367. end;
  368. if all_member_optimization then
  369. begin
  370. { right.location is a LOC_REGISTER }
  371. { when swapped another result register }
  372. if nf_swapped in flags then
  373. begin
  374. { newly swapped also set swapped flag }
  375. location_swap(left.location,right.location);
  376. toggleflag(nf_swapped);
  377. end;
  378. location_force_reg(current_asmdata.currAsmList,right.location,opsize,false);
  379. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  380. location:=right.location;
  381. end
  382. else
  383. begin
  384. { left must be a register }
  385. left_must_be_reg(opsize,noswap);
  386. emit_generic_code(op,opsize,true,extra_not,false);
  387. location_freetemp(current_asmdata.CurrAsmList,right.location);
  388. { left is always a register and contains the result }
  389. location:=left.location;
  390. end;
  391. { fix the changed opsize we did above because of the missing btsb }
  392. if opsize<>int_cgsize(resultdef.size) then
  393. location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false);
  394. end;
  395. procedure tx86addnode.second_cmpsmallset;
  396. var
  397. opsize : TCGSize;
  398. op : TAsmOp;
  399. begin
  400. pass_left_right;
  401. opsize:=int_cgsize(left.resultdef.size);
  402. case nodetype of
  403. equaln,
  404. unequaln :
  405. op:=A_CMP;
  406. lten,gten:
  407. begin
  408. if (not(nf_swapped in flags) and (nodetype = lten)) or
  409. ((nf_swapped in flags) and (nodetype = gten)) then
  410. swapleftright;
  411. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  412. emit_op_right_left(A_AND,opsize);
  413. op:=A_CMP;
  414. { warning: ugly hack, we need a JE so change the node to equaln }
  415. nodetype:=equaln;
  416. end;
  417. else
  418. internalerror(2003042215);
  419. end;
  420. { left must be a register }
  421. left_must_be_reg(opsize,false);
  422. emit_generic_code(op,opsize,true,false,false);
  423. location_freetemp(current_asmdata.CurrAsmList,right.location);
  424. location_freetemp(current_asmdata.CurrAsmList,left.location);
  425. location_reset(location,LOC_FLAGS,OS_NO);
  426. location.resflags:=getresflags(true);
  427. end;
  428. {*****************************************************************************
  429. AddMMX
  430. *****************************************************************************}
  431. {$ifdef SUPPORT_MMX}
  432. procedure tx86addnode.second_opmmx;
  433. var
  434. op : TAsmOp;
  435. cmpop : boolean;
  436. mmxbase : tmmxtype;
  437. hreg,
  438. hregister : tregister;
  439. begin
  440. pass_left_right;
  441. cmpop:=false;
  442. mmxbase:=mmx_type(left.resultdef);
  443. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  444. case nodetype of
  445. addn :
  446. begin
  447. if (cs_mmx_saturation in current_settings.localswitches) then
  448. begin
  449. case mmxbase of
  450. mmxs8bit:
  451. op:=A_PADDSB;
  452. mmxu8bit:
  453. op:=A_PADDUSB;
  454. mmxs16bit,mmxfixed16:
  455. op:=A_PADDSW;
  456. mmxu16bit:
  457. op:=A_PADDUSW;
  458. end;
  459. end
  460. else
  461. begin
  462. case mmxbase of
  463. mmxs8bit,mmxu8bit:
  464. op:=A_PADDB;
  465. mmxs16bit,mmxu16bit,mmxfixed16:
  466. op:=A_PADDW;
  467. mmxs32bit,mmxu32bit:
  468. op:=A_PADDD;
  469. end;
  470. end;
  471. end;
  472. muln :
  473. begin
  474. case mmxbase of
  475. mmxs16bit,mmxu16bit:
  476. op:=A_PMULLW;
  477. mmxfixed16:
  478. op:=A_PMULHW;
  479. end;
  480. end;
  481. subn :
  482. begin
  483. if (cs_mmx_saturation in current_settings.localswitches) then
  484. begin
  485. case mmxbase of
  486. mmxs8bit:
  487. op:=A_PSUBSB;
  488. mmxu8bit:
  489. op:=A_PSUBUSB;
  490. mmxs16bit,mmxfixed16:
  491. op:=A_PSUBSB;
  492. mmxu16bit:
  493. op:=A_PSUBUSW;
  494. end;
  495. end
  496. else
  497. begin
  498. case mmxbase of
  499. mmxs8bit,mmxu8bit:
  500. op:=A_PSUBB;
  501. mmxs16bit,mmxu16bit,mmxfixed16:
  502. op:=A_PSUBW;
  503. mmxs32bit,mmxu32bit:
  504. op:=A_PSUBD;
  505. end;
  506. end;
  507. end;
  508. xorn:
  509. op:=A_PXOR;
  510. orn:
  511. op:=A_POR;
  512. andn:
  513. op:=A_PAND;
  514. else
  515. internalerror(2003042214);
  516. end;
  517. { left and right no register? }
  518. { then one must be demanded }
  519. if (left.location.loc<>LOC_MMXREGISTER) then
  520. begin
  521. if (right.location.loc=LOC_MMXREGISTER) then
  522. begin
  523. location_swap(left.location,right.location);
  524. toggleflag(nf_swapped);
  525. end
  526. else
  527. begin
  528. { register variable ? }
  529. if (left.location.loc=LOC_CMMXREGISTER) then
  530. begin
  531. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  532. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  533. end
  534. else
  535. begin
  536. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  537. internalerror(200203245);
  538. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  539. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  540. end;
  541. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  542. left.location.register:=hregister;
  543. end;
  544. end;
  545. { at this point, left.location.loc should be LOC_MMXREGISTER }
  546. if right.location.loc<>LOC_MMXREGISTER then
  547. begin
  548. if (nodetype=subn) and (nf_swapped in flags) then
  549. begin
  550. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  551. if right.location.loc=LOC_CMMXREGISTER then
  552. begin
  553. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  554. emit_reg_reg(op,S_NO,left.location.register,hreg);
  555. end
  556. else
  557. begin
  558. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  559. internalerror(200203247);
  560. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  561. emit_reg_reg(op,S_NO,left.location.register,hreg);
  562. end;
  563. location.register:=hreg;
  564. end
  565. else
  566. begin
  567. if (right.location.loc=LOC_CMMXREGISTER) then
  568. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  569. else
  570. begin
  571. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  572. internalerror(200203246);
  573. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  574. end;
  575. location.register:=left.location.register;
  576. end;
  577. end
  578. else
  579. begin
  580. { right.location=LOC_MMXREGISTER }
  581. if (nodetype=subn) and (nf_swapped in flags) then
  582. begin
  583. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  584. location_swap(left.location,right.location);
  585. toggleflag(nf_swapped);
  586. end
  587. else
  588. begin
  589. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  590. end;
  591. location.register:=left.location.register;
  592. end;
  593. location_freetemp(current_asmdata.CurrAsmList,right.location);
  594. if cmpop then
  595. location_freetemp(current_asmdata.CurrAsmList,left.location);
  596. end;
  597. {$endif SUPPORT_MMX}
  598. {*****************************************************************************
  599. addmmxset
  600. *****************************************************************************}
  601. {$ifdef SUPPORT_MMX}
  602. procedure tx86addnode.second_opmmxset;
  603. var opsize : TCGSize;
  604. op : TAsmOp;
  605. cmpop,
  606. noswap : boolean;
  607. begin
  608. pass_left_right;
  609. cmpop:=false;
  610. noswap:=false;
  611. opsize:=OS_32;
  612. case nodetype of
  613. addn:
  614. begin
  615. { are we adding set elements ? }
  616. if right.nodetype=setelementn then
  617. begin
  618. { adding elements is not commutative }
  619. { if nf_swapped in flags then
  620. swapleftright;}
  621. { bts requires both elements to be registers }
  622. { location_force_reg(current_asmdata.CurrAsmList,left.location,opsize_2_cgsize[opsize],false);
  623. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize_2_cgsize[opsize],true);
  624. op:=A_BTS;
  625. noswap:=true;}
  626. end
  627. else
  628. op:=A_POR;
  629. end;
  630. symdifn :
  631. op:=A_PXOR;
  632. muln:
  633. op:=A_PAND;
  634. subn:
  635. op:=A_PANDN;
  636. equaln,
  637. unequaln :
  638. begin
  639. op:=A_PCMPEQD;
  640. cmpop:=true;
  641. end;
  642. lten,gten:
  643. begin
  644. if (not(nf_swapped in flags) and (nodetype = lten)) or
  645. ((nf_swapped in flags) and (nodetype = gten)) then
  646. swapleftright;
  647. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  648. emit_op_right_left(A_AND,opsize);
  649. op:=A_PCMPEQD;
  650. cmpop:=true;
  651. { warning: ugly hack, we need a JE so change the node to equaln }
  652. nodetype:=equaln;
  653. end;
  654. xorn :
  655. op:=A_PXOR;
  656. orn :
  657. op:=A_POR;
  658. andn :
  659. op:=A_PAND;
  660. else
  661. internalerror(2003042215);
  662. end;
  663. { left must be a register }
  664. left_must_be_reg(opsize,noswap);
  665. { emit_generic_code(op,opsize,true,extra_not,false);}
  666. location_freetemp(current_asmdata.CurrAsmList,right.location);
  667. if cmpop then
  668. location_freetemp(current_asmdata.CurrAsmList,left.location);
  669. end;
  670. {$endif SUPPORT_MMX}
  671. {*****************************************************************************
  672. AddFloat
  673. *****************************************************************************}
  674. procedure tx86addnode.second_addfloatsse;
  675. var
  676. op : topcg;
  677. begin
  678. pass_left_right;
  679. check_left_and_right_fpureg(false);
  680. if (nf_swapped in flags) then
  681. { can't use swapleftright if both are on the fpu stack, since then }
  682. { both are "R_ST" -> nothing would change -> manually switch }
  683. if (left.location.loc = LOC_FPUREGISTER) and
  684. (right.location.loc = LOC_FPUREGISTER) then
  685. emit_none(A_FXCH,S_NO)
  686. else
  687. swapleftright;
  688. case nodetype of
  689. addn :
  690. op:=OP_ADD;
  691. muln :
  692. op:=OP_MUL;
  693. subn :
  694. op:=OP_SUB;
  695. slashn :
  696. op:=OP_DIV;
  697. else
  698. internalerror(200312231);
  699. end;
  700. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  701. { we can use only right as left operand if the operation is commutative }
  702. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  703. begin
  704. location.register:=right.location.register;
  705. { force floating point reg. location to be written to memory,
  706. we don't force it to mm register because writing to memory
  707. allows probably shorter code because there is no direct fpu->mm register
  708. copy instruction
  709. }
  710. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  711. location_force_mem(current_asmdata.CurrAsmList,left.location);
  712. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  713. end
  714. else
  715. begin
  716. if (nf_swapped in flags) then
  717. swapleftright;
  718. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  719. location.register:=left.location.register;
  720. { force floating point reg. location to be written to memory,
  721. we don't force it to mm register because writing to memory
  722. allows probably shorter code because there is no direct fpu->mm register
  723. copy instruction
  724. }
  725. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  726. location_force_mem(current_asmdata.CurrAsmList,right.location);
  727. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  728. end;
  729. end;
  730. procedure tx86addnode.second_cmpfloatsse;
  731. var
  732. op : tasmop;
  733. begin
  734. if is_single(left.resultdef) then
  735. op:=A_COMISS
  736. else if is_double(left.resultdef) then
  737. op:=A_COMISD
  738. else
  739. internalerror(200402222);
  740. pass_left_right;
  741. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  742. { we can use only right as left operand if the operation is commutative }
  743. if (right.location.loc=LOC_MMREGISTER) then
  744. begin
  745. { force floating point reg. location to be written to memory,
  746. we don't force it to mm register because writing to memory
  747. allows probably shorter code because there is no direct fpu->mm register
  748. copy instruction
  749. }
  750. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  751. location_force_mem(current_asmdata.CurrAsmList,left.location);
  752. case left.location.loc of
  753. LOC_REFERENCE,LOC_CREFERENCE:
  754. begin
  755. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  756. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  757. end;
  758. LOC_MMREGISTER,LOC_CMMREGISTER:
  759. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  760. else
  761. internalerror(200402221);
  762. end;
  763. if nf_swapped in flags then
  764. exclude(flags,nf_swapped)
  765. else
  766. include(flags,nf_swapped)
  767. end
  768. else
  769. begin
  770. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  771. { force floating point reg. location to be written to memory,
  772. we don't force it to mm register because writing to memory
  773. allows probably shorter code because there is no direct fpu->mm register
  774. copy instruction
  775. }
  776. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  777. location_force_mem(current_asmdata.CurrAsmList,right.location);
  778. case right.location.loc of
  779. LOC_REFERENCE,LOC_CREFERENCE:
  780. begin
  781. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  782. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  783. end;
  784. LOC_MMREGISTER,LOC_CMMREGISTER:
  785. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  786. else
  787. internalerror(200402223);
  788. end;
  789. end;
  790. location.resflags:=getresflags(true);
  791. end;
  792. procedure tx86addnode.second_opvector;
  793. var
  794. op : topcg;
  795. begin
  796. pass_left_right;
  797. if (nf_swapped in flags) then
  798. swapleftright;
  799. case nodetype of
  800. addn :
  801. op:=OP_ADD;
  802. muln :
  803. op:=OP_MUL;
  804. subn :
  805. op:=OP_SUB;
  806. slashn :
  807. op:=OP_DIV;
  808. else
  809. internalerror(200610071);
  810. end;
  811. if fits_in_mm_register(left.resultdef) then
  812. begin
  813. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  814. { we can use only right as left operand if the operation is commutative }
  815. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  816. begin
  817. location.register:=right.location.register;
  818. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  819. end
  820. else
  821. begin
  822. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  823. location.register:=left.location.register;
  824. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  825. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  826. end;
  827. end
  828. else
  829. begin
  830. { not yet supported }
  831. internalerror(200610072);
  832. end
  833. end;
  834. procedure tx86addnode.second_addfloat;
  835. var
  836. op : TAsmOp;
  837. begin
  838. if use_sse(resultdef) then
  839. begin
  840. second_addfloatsse;
  841. exit;
  842. end;
  843. pass_left_right;
  844. case nodetype of
  845. addn :
  846. op:=A_FADDP;
  847. muln :
  848. op:=A_FMULP;
  849. subn :
  850. op:=A_FSUBP;
  851. slashn :
  852. op:=A_FDIVP;
  853. else
  854. internalerror(2003042214);
  855. end;
  856. check_left_and_right_fpureg(true);
  857. { if we swaped the tree nodes, then use the reverse operator }
  858. if nf_swapped in flags then
  859. begin
  860. if (nodetype=slashn) then
  861. op:=A_FDIVRP
  862. else if (nodetype=subn) then
  863. op:=A_FSUBRP;
  864. end;
  865. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  866. tcgx86(cg).dec_fpu_stack;
  867. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  868. location.register:=NR_ST;
  869. end;
  870. procedure tx86addnode.second_cmpfloat;
  871. var
  872. resflags : tresflags;
  873. begin
  874. if use_sse(left.resultdef) or use_sse(right.resultdef) then
  875. begin
  876. second_cmpfloatsse;
  877. exit;
  878. end;
  879. pass_left_right;
  880. check_left_and_right_fpureg(true);
  881. {$ifndef x86_64}
  882. if current_settings.cputype<cpu_Pentium2 then
  883. begin
  884. emit_none(A_FCOMPP,S_NO);
  885. tcgx86(cg).dec_fpu_stack;
  886. tcgx86(cg).dec_fpu_stack;
  887. { load fpu flags }
  888. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  889. emit_reg(A_FNSTSW,S_NO,NR_AX);
  890. emit_none(A_SAHF,S_NO);
  891. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  892. if nf_swapped in flags then
  893. begin
  894. case nodetype of
  895. equaln : resflags:=F_E;
  896. unequaln : resflags:=F_NE;
  897. ltn : resflags:=F_A;
  898. lten : resflags:=F_AE;
  899. gtn : resflags:=F_B;
  900. gten : resflags:=F_BE;
  901. end;
  902. end
  903. else
  904. begin
  905. case nodetype of
  906. equaln : resflags:=F_E;
  907. unequaln : resflags:=F_NE;
  908. ltn : resflags:=F_B;
  909. lten : resflags:=F_BE;
  910. gtn : resflags:=F_A;
  911. gten : resflags:=F_AE;
  912. end;
  913. end;
  914. end
  915. else
  916. {$endif x86_64}
  917. begin
  918. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  919. { fcomip pops only one fpu register }
  920. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  921. tcgx86(cg).dec_fpu_stack;
  922. tcgx86(cg).dec_fpu_stack;
  923. { load fpu flags }
  924. if nf_swapped in flags then
  925. begin
  926. case nodetype of
  927. equaln : resflags:=F_E;
  928. unequaln : resflags:=F_NE;
  929. ltn : resflags:=F_A;
  930. lten : resflags:=F_AE;
  931. gtn : resflags:=F_B;
  932. gten : resflags:=F_BE;
  933. end;
  934. end
  935. else
  936. begin
  937. case nodetype of
  938. equaln : resflags:=F_E;
  939. unequaln : resflags:=F_NE;
  940. ltn : resflags:=F_B;
  941. lten : resflags:=F_BE;
  942. gtn : resflags:=F_A;
  943. gten : resflags:=F_AE;
  944. end;
  945. end;
  946. end;
  947. location_reset(location,LOC_FLAGS,OS_NO);
  948. location.resflags:=resflags;
  949. end;
  950. {*****************************************************************************
  951. Add64bit
  952. *****************************************************************************}
  953. procedure tx86addnode.second_add64bit;
  954. begin
  955. {$ifdef cpu64bit}
  956. second_addordinal;
  957. {$else cpu64bit}
  958. { must be implemented separate }
  959. internalerror(200402042);
  960. {$endif cpu64bit}
  961. end;
  962. procedure tx86addnode.second_cmp64bit;
  963. begin
  964. {$ifdef cpu64bit}
  965. second_cmpordinal;
  966. {$else cpu64bit}
  967. { must be implemented separate }
  968. internalerror(200402043);
  969. {$endif cpu64bit}
  970. end;
  971. {*****************************************************************************
  972. AddOrdinal
  973. *****************************************************************************}
  974. procedure tx86addnode.second_addordinal;
  975. begin
  976. { filter unsigned MUL opcode, which requires special handling }
  977. if (nodetype=muln) and
  978. (not(is_signed(left.resultdef)) or
  979. not(is_signed(right.resultdef))) then
  980. begin
  981. second_mul;
  982. exit;
  983. end;
  984. inherited second_addordinal;
  985. end;
  986. procedure tx86addnode.second_cmpordinal;
  987. var
  988. opsize : tcgsize;
  989. unsigned : boolean;
  990. begin
  991. unsigned:=not(is_signed(left.resultdef)) or
  992. not(is_signed(right.resultdef));
  993. opsize:=def_cgsize(left.resultdef);
  994. pass_left_right;
  995. left_must_be_reg(opsize,false);
  996. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  997. location_freetemp(current_asmdata.CurrAsmList,right.location);
  998. location_freetemp(current_asmdata.CurrAsmList,left.location);
  999. location_reset(location,LOC_FLAGS,OS_NO);
  1000. location.resflags:=getresflags(unsigned);
  1001. end;
  1002. begin
  1003. caddnode:=tx86addnode;
  1004. end.