nx86add.pas 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. procedure second_mul;virtual;abstract;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_addordinal;override;
  40. procedure second_cmpfloat;override;
  41. procedure second_cmpsmallset;override;
  42. procedure second_cmp64bit;override;
  43. procedure second_cmpordinal;override;
  44. {$ifdef SUPPORT_MMX}
  45. procedure second_opmmx;override;
  46. {$endif SUPPORT_MMX}
  47. procedure second_opvector;override;
  48. end;
  49. implementation
  50. uses
  51. globtype,globals,
  52. verbose,cutils,
  53. cpuinfo,
  54. aasmbase,aasmtai,aasmdata,aasmcpu,
  55. symconst,symdef,
  56. cgobj,cgx86,cga,cgutils,
  57. paramgr,tgobj,ncgutil,
  58. ncon,nset,
  59. defutil;
  60. {*****************************************************************************
  61. Helpers
  62. *****************************************************************************}
  63. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  64. var
  65. power : longint;
  66. hl4 : tasmlabel;
  67. r : Tregister;
  68. begin
  69. { at this point, left.location.loc should be LOC_REGISTER }
  70. if right.location.loc=LOC_REGISTER then
  71. begin
  72. { right.location is a LOC_REGISTER }
  73. { when swapped another result register }
  74. if (nodetype=subn) and (nf_swapped in flags) then
  75. begin
  76. if extra_not then
  77. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  78. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  79. { newly swapped also set swapped flag }
  80. location_swap(left.location,right.location);
  81. toggleflag(nf_swapped);
  82. end
  83. else
  84. begin
  85. if extra_not then
  86. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  87. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  88. location_swap(left.location,right.location);
  89. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  90. end;
  91. end
  92. else
  93. begin
  94. { right.location is not a LOC_REGISTER }
  95. if (nodetype=subn) and (nf_swapped in flags) then
  96. begin
  97. if extra_not then
  98. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  99. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  100. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  101. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  102. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  103. end
  104. else
  105. begin
  106. { Optimizations when right.location is a constant value }
  107. if (op=A_CMP) and
  108. (nodetype in [equaln,unequaln]) and
  109. (right.location.loc=LOC_CONSTANT) and
  110. (right.location.value=0) then
  111. begin
  112. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  113. end
  114. else
  115. if (op=A_ADD) and
  116. (right.location.loc=LOC_CONSTANT) and
  117. (right.location.value=1) and
  118. not(cs_check_overflow in current_settings.localswitches) then
  119. begin
  120. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  121. end
  122. else
  123. if (op=A_SUB) and
  124. (right.location.loc=LOC_CONSTANT) and
  125. (right.location.value=1) and
  126. not(cs_check_overflow in current_settings.localswitches) then
  127. begin
  128. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  129. end
  130. else
  131. if (op=A_IMUL) and
  132. (right.location.loc=LOC_CONSTANT) and
  133. (ispowerof2(int64(right.location.value),power)) and
  134. not(cs_check_overflow in current_settings.localswitches) then
  135. begin
  136. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  137. end
  138. else
  139. begin
  140. if extra_not then
  141. begin
  142. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  143. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  144. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  145. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  146. end
  147. else
  148. begin
  149. emit_op_right_left(op,opsize);
  150. end;
  151. end;
  152. end;
  153. end;
  154. { only in case of overflow operations }
  155. { produce overflow code }
  156. { we must put it here directly, because sign of operation }
  157. { is in unsigned VAR!! }
  158. if mboverflow then
  159. begin
  160. if cs_check_overflow in current_settings.localswitches then
  161. begin
  162. current_asmdata.getjumplabel(hl4);
  163. if unsigned then
  164. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  165. else
  166. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  167. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  168. cg.a_label(current_asmdata.CurrAsmList,hl4);
  169. end;
  170. end;
  171. end;
  172. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  173. begin
  174. { left location is not a register? }
  175. if (left.location.loc<>LOC_REGISTER) then
  176. begin
  177. { if right is register then we can swap the locations }
  178. if (not noswap) and
  179. (right.location.loc=LOC_REGISTER) then
  180. begin
  181. location_swap(left.location,right.location);
  182. toggleflag(nf_swapped);
  183. end
  184. else
  185. begin
  186. { maybe we can reuse a constant register when the
  187. operation is a comparison that doesn't change the
  188. value of the register }
  189. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  190. end;
  191. end;
  192. if (right.location.loc<>LOC_CONSTANT) and
  193. (tcgsize2unsigned[right.location.size]<>opsize) then
  194. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  195. if (left.location.loc<>LOC_CONSTANT) and
  196. (tcgsize2unsigned[left.location.size]<>opsize) then
  197. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  198. end;
  199. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  200. begin
  201. if (right.location.loc<>LOC_FPUREGISTER) then
  202. begin
  203. if (force_fpureg) then
  204. begin
  205. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  206. if (left.location.loc<>LOC_FPUREGISTER) then
  207. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  208. else
  209. { left was on the stack => swap }
  210. toggleflag(nf_swapped);
  211. end
  212. end
  213. { the nominator in st0 }
  214. else if (left.location.loc<>LOC_FPUREGISTER) then
  215. begin
  216. if (force_fpureg) then
  217. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  218. end
  219. else
  220. begin
  221. { fpu operands are always in the wrong order on the stack }
  222. toggleflag(nf_swapped);
  223. end;
  224. end;
  225. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  226. {$ifdef x86_64}
  227. var
  228. tmpreg : tregister;
  229. {$endif x86_64}
  230. begin
  231. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  232. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
  233. { left must be a register }
  234. case right.location.loc of
  235. LOC_REGISTER,
  236. LOC_CREGISTER :
  237. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  238. LOC_REFERENCE,
  239. LOC_CREFERENCE :
  240. begin
  241. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  242. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  243. end;
  244. LOC_CONSTANT :
  245. begin
  246. {$ifdef x86_64}
  247. { x86_64 only supports signed 32 bits constants directly }
  248. if (opsize in [OS_S64,OS_64]) and
  249. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  250. begin
  251. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  252. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  253. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  254. end
  255. else
  256. {$endif x86_64}
  257. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  258. end;
  259. else
  260. internalerror(200203232);
  261. end;
  262. end;
  263. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  264. begin
  265. case nodetype of
  266. equaln : getresflags:=F_E;
  267. unequaln : getresflags:=F_NE;
  268. else
  269. if not(unsigned) then
  270. begin
  271. if nf_swapped in flags then
  272. case nodetype of
  273. ltn : getresflags:=F_G;
  274. lten : getresflags:=F_GE;
  275. gtn : getresflags:=F_L;
  276. gten : getresflags:=F_LE;
  277. end
  278. else
  279. case nodetype of
  280. ltn : getresflags:=F_L;
  281. lten : getresflags:=F_LE;
  282. gtn : getresflags:=F_G;
  283. gten : getresflags:=F_GE;
  284. end;
  285. end
  286. else
  287. begin
  288. if nf_swapped in flags then
  289. case nodetype of
  290. ltn : getresflags:=F_A;
  291. lten : getresflags:=F_AE;
  292. gtn : getresflags:=F_B;
  293. gten : getresflags:=F_BE;
  294. end
  295. else
  296. case nodetype of
  297. ltn : getresflags:=F_B;
  298. lten : getresflags:=F_BE;
  299. gtn : getresflags:=F_A;
  300. gten : getresflags:=F_AE;
  301. end;
  302. end;
  303. end;
  304. end;
  305. {*****************************************************************************
  306. AddSmallSet
  307. *****************************************************************************}
  308. procedure tx86addnode.second_addsmallset;
  309. var
  310. setbase : aint;
  311. opsize : TCGSize;
  312. op : TAsmOp;
  313. extra_not,
  314. noswap : boolean;
  315. all_member_optimization:boolean;
  316. begin
  317. pass_left_right;
  318. noswap:=false;
  319. extra_not:=false;
  320. all_member_optimization:=false;
  321. opsize:=int_cgsize(resultdef.size);
  322. if (left.resultdef.typ=setdef) then
  323. setbase:=tsetdef(left.resultdef).setbase
  324. else
  325. setbase:=tsetdef(right.resultdef).setbase;
  326. case nodetype of
  327. addn :
  328. begin
  329. { adding elements is not commutative }
  330. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  331. swapleftright;
  332. { are we adding set elements ? }
  333. if right.nodetype=setelementn then
  334. begin
  335. { no range support for smallsets! }
  336. if assigned(tsetelementnode(right).right) then
  337. internalerror(43244);
  338. { btsb isn't supported }
  339. if opsize=OS_8 then
  340. opsize:=OS_32;
  341. { bts requires both elements to be registers }
  342. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  343. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  344. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  345. op:=A_BTS;
  346. noswap:=true;
  347. end
  348. else
  349. op:=A_OR;
  350. end;
  351. symdifn :
  352. op:=A_XOR;
  353. muln :
  354. op:=A_AND;
  355. subn :
  356. begin
  357. op:=A_AND;
  358. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  359. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  360. all_member_optimization:=true;
  361. if (not(nf_swapped in flags)) and
  362. (right.location.loc=LOC_CONSTANT) then
  363. right.location.value := not(right.location.value)
  364. else if (nf_swapped in flags) and
  365. (left.location.loc=LOC_CONSTANT) then
  366. left.location.value := not(left.location.value)
  367. else
  368. extra_not:=true;
  369. end;
  370. xorn :
  371. op:=A_XOR;
  372. orn :
  373. op:=A_OR;
  374. andn :
  375. op:=A_AND;
  376. else
  377. internalerror(2003042215);
  378. end;
  379. if all_member_optimization then
  380. begin
  381. {A set expression [0..31]-x can be implemented with a simple NOT.}
  382. if nf_swapped in flags then
  383. begin
  384. { newly swapped also set swapped flag }
  385. location_swap(left.location,right.location);
  386. toggleflag(nf_swapped);
  387. end;
  388. location_force_reg(current_asmdata.currAsmList,right.location,opsize,false);
  389. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  390. location:=right.location;
  391. end
  392. else
  393. begin
  394. { left must be a register }
  395. left_must_be_reg(opsize,noswap);
  396. emit_generic_code(op,opsize,true,extra_not,false);
  397. location_freetemp(current_asmdata.CurrAsmList,right.location);
  398. { left is always a register and contains the result }
  399. location:=left.location;
  400. end;
  401. { fix the changed opsize we did above because of the missing btsb }
  402. if opsize<>int_cgsize(resultdef.size) then
  403. location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false);
  404. end;
  405. procedure tx86addnode.second_cmpsmallset;
  406. var
  407. opsize : TCGSize;
  408. op : TAsmOp;
  409. begin
  410. pass_left_right;
  411. opsize:=int_cgsize(left.resultdef.size);
  412. case nodetype of
  413. equaln,
  414. unequaln :
  415. op:=A_CMP;
  416. lten,gten:
  417. begin
  418. if (not(nf_swapped in flags) and (nodetype = lten)) or
  419. ((nf_swapped in flags) and (nodetype = gten)) then
  420. swapleftright;
  421. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  422. emit_op_right_left(A_AND,opsize);
  423. op:=A_CMP;
  424. { warning: ugly hack, we need a JE so change the node to equaln }
  425. nodetype:=equaln;
  426. end;
  427. else
  428. internalerror(2003042215);
  429. end;
  430. { left must be a register }
  431. left_must_be_reg(opsize,false);
  432. emit_generic_code(op,opsize,true,false,false);
  433. location_freetemp(current_asmdata.CurrAsmList,right.location);
  434. location_freetemp(current_asmdata.CurrAsmList,left.location);
  435. location_reset(location,LOC_FLAGS,OS_NO);
  436. location.resflags:=getresflags(true);
  437. end;
  438. {*****************************************************************************
  439. AddMMX
  440. *****************************************************************************}
  441. {$ifdef SUPPORT_MMX}
  442. procedure tx86addnode.second_opmmx;
  443. var
  444. op : TAsmOp;
  445. cmpop : boolean;
  446. mmxbase : tmmxtype;
  447. hreg,
  448. hregister : tregister;
  449. begin
  450. pass_left_right;
  451. cmpop:=false;
  452. mmxbase:=mmx_type(left.resultdef);
  453. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  454. case nodetype of
  455. addn :
  456. begin
  457. if (cs_mmx_saturation in current_settings.localswitches) then
  458. begin
  459. case mmxbase of
  460. mmxs8bit:
  461. op:=A_PADDSB;
  462. mmxu8bit:
  463. op:=A_PADDUSB;
  464. mmxs16bit,mmxfixed16:
  465. op:=A_PADDSW;
  466. mmxu16bit:
  467. op:=A_PADDUSW;
  468. end;
  469. end
  470. else
  471. begin
  472. case mmxbase of
  473. mmxs8bit,mmxu8bit:
  474. op:=A_PADDB;
  475. mmxs16bit,mmxu16bit,mmxfixed16:
  476. op:=A_PADDW;
  477. mmxs32bit,mmxu32bit:
  478. op:=A_PADDD;
  479. end;
  480. end;
  481. end;
  482. muln :
  483. begin
  484. case mmxbase of
  485. mmxs16bit,mmxu16bit:
  486. op:=A_PMULLW;
  487. mmxfixed16:
  488. op:=A_PMULHW;
  489. end;
  490. end;
  491. subn :
  492. begin
  493. if (cs_mmx_saturation in current_settings.localswitches) then
  494. begin
  495. case mmxbase of
  496. mmxs8bit:
  497. op:=A_PSUBSB;
  498. mmxu8bit:
  499. op:=A_PSUBUSB;
  500. mmxs16bit,mmxfixed16:
  501. op:=A_PSUBSB;
  502. mmxu16bit:
  503. op:=A_PSUBUSW;
  504. end;
  505. end
  506. else
  507. begin
  508. case mmxbase of
  509. mmxs8bit,mmxu8bit:
  510. op:=A_PSUBB;
  511. mmxs16bit,mmxu16bit,mmxfixed16:
  512. op:=A_PSUBW;
  513. mmxs32bit,mmxu32bit:
  514. op:=A_PSUBD;
  515. end;
  516. end;
  517. end;
  518. xorn:
  519. op:=A_PXOR;
  520. orn:
  521. op:=A_POR;
  522. andn:
  523. op:=A_PAND;
  524. else
  525. internalerror(2003042214);
  526. end;
  527. { left and right no register? }
  528. { then one must be demanded }
  529. if (left.location.loc<>LOC_MMXREGISTER) then
  530. begin
  531. if (right.location.loc=LOC_MMXREGISTER) then
  532. begin
  533. location_swap(left.location,right.location);
  534. toggleflag(nf_swapped);
  535. end
  536. else
  537. begin
  538. { register variable ? }
  539. if (left.location.loc=LOC_CMMXREGISTER) then
  540. begin
  541. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  542. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  543. end
  544. else
  545. begin
  546. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  547. internalerror(200203245);
  548. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  549. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  550. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  551. end;
  552. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  553. left.location.register:=hregister;
  554. end;
  555. end;
  556. { at this point, left.location.loc should be LOC_MMXREGISTER }
  557. if right.location.loc<>LOC_MMXREGISTER then
  558. begin
  559. if (nodetype=subn) and (nf_swapped in flags) then
  560. begin
  561. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  562. if right.location.loc=LOC_CMMXREGISTER then
  563. begin
  564. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  565. emit_reg_reg(op,S_NO,left.location.register,hreg);
  566. end
  567. else
  568. begin
  569. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  570. internalerror(200203247);
  571. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  572. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  573. emit_reg_reg(op,S_NO,left.location.register,hreg);
  574. end;
  575. location.register:=hreg;
  576. end
  577. else
  578. begin
  579. if (right.location.loc=LOC_CMMXREGISTER) then
  580. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  581. else
  582. begin
  583. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  584. internalerror(200203246);
  585. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  586. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  587. end;
  588. location.register:=left.location.register;
  589. end;
  590. end
  591. else
  592. begin
  593. { right.location=LOC_MMXREGISTER }
  594. if (nodetype=subn) and (nf_swapped in flags) then
  595. begin
  596. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  597. location_swap(left.location,right.location);
  598. toggleflag(nf_swapped);
  599. end
  600. else
  601. begin
  602. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  603. end;
  604. location.register:=left.location.register;
  605. end;
  606. location_freetemp(current_asmdata.CurrAsmList,right.location);
  607. if cmpop then
  608. location_freetemp(current_asmdata.CurrAsmList,left.location);
  609. end;
  610. {$endif SUPPORT_MMX}
  611. {*****************************************************************************
  612. AddFloat
  613. *****************************************************************************}
  614. procedure tx86addnode.second_addfloatsse;
  615. var
  616. op : topcg;
  617. begin
  618. pass_left_right;
  619. check_left_and_right_fpureg(false);
  620. if (nf_swapped in flags) then
  621. { can't use swapleftright if both are on the fpu stack, since then }
  622. { both are "R_ST" -> nothing would change -> manually switch }
  623. if (left.location.loc = LOC_FPUREGISTER) and
  624. (right.location.loc = LOC_FPUREGISTER) then
  625. emit_none(A_FXCH,S_NO)
  626. else
  627. swapleftright;
  628. case nodetype of
  629. addn :
  630. op:=OP_ADD;
  631. muln :
  632. op:=OP_MUL;
  633. subn :
  634. op:=OP_SUB;
  635. slashn :
  636. op:=OP_DIV;
  637. else
  638. internalerror(200312231);
  639. end;
  640. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  641. { we can use only right as left operand if the operation is commutative }
  642. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  643. begin
  644. location.register:=right.location.register;
  645. { force floating point reg. location to be written to memory,
  646. we don't force it to mm register because writing to memory
  647. allows probably shorter code because there is no direct fpu->mm register
  648. copy instruction
  649. }
  650. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  651. location_force_mem(current_asmdata.CurrAsmList,left.location);
  652. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  653. end
  654. else
  655. begin
  656. if (nf_swapped in flags) then
  657. swapleftright;
  658. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  659. location.register:=left.location.register;
  660. { force floating point reg. location to be written to memory,
  661. we don't force it to mm register because writing to memory
  662. allows probably shorter code because there is no direct fpu->mm register
  663. copy instruction
  664. }
  665. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  666. location_force_mem(current_asmdata.CurrAsmList,right.location);
  667. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  668. end;
  669. end;
  670. procedure tx86addnode.second_cmpfloatsse;
  671. var
  672. op : tasmop;
  673. begin
  674. if is_single(left.resultdef) then
  675. op:=A_COMISS
  676. else if is_double(left.resultdef) then
  677. op:=A_COMISD
  678. else
  679. internalerror(200402222);
  680. pass_left_right;
  681. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  682. { we can use only right as left operand if the operation is commutative }
  683. if (right.location.loc=LOC_MMREGISTER) then
  684. begin
  685. { force floating point reg. location to be written to memory,
  686. we don't force it to mm register because writing to memory
  687. allows probably shorter code because there is no direct fpu->mm register
  688. copy instruction
  689. }
  690. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  691. location_force_mem(current_asmdata.CurrAsmList,left.location);
  692. case left.location.loc of
  693. LOC_REFERENCE,LOC_CREFERENCE:
  694. begin
  695. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  696. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  697. end;
  698. LOC_MMREGISTER,LOC_CMMREGISTER:
  699. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  700. else
  701. internalerror(200402221);
  702. end;
  703. if nf_swapped in flags then
  704. exclude(flags,nf_swapped)
  705. else
  706. include(flags,nf_swapped)
  707. end
  708. else
  709. begin
  710. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  711. { force floating point reg. location to be written to memory,
  712. we don't force it to mm register because writing to memory
  713. allows probably shorter code because there is no direct fpu->mm register
  714. copy instruction
  715. }
  716. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  717. location_force_mem(current_asmdata.CurrAsmList,right.location);
  718. case right.location.loc of
  719. LOC_REFERENCE,LOC_CREFERENCE:
  720. begin
  721. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  722. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  723. end;
  724. LOC_MMREGISTER,LOC_CMMREGISTER:
  725. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  726. else
  727. internalerror(200402223);
  728. end;
  729. end;
  730. location.resflags:=getresflags(true);
  731. end;
  732. procedure tx86addnode.second_opvector;
  733. var
  734. op : topcg;
  735. begin
  736. pass_left_right;
  737. if (nf_swapped in flags) then
  738. swapleftright;
  739. case nodetype of
  740. addn :
  741. op:=OP_ADD;
  742. muln :
  743. op:=OP_MUL;
  744. subn :
  745. op:=OP_SUB;
  746. slashn :
  747. op:=OP_DIV;
  748. else
  749. internalerror(200610071);
  750. end;
  751. if fits_in_mm_register(left.resultdef) then
  752. begin
  753. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  754. { we can use only right as left operand if the operation is commutative }
  755. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  756. begin
  757. location.register:=right.location.register;
  758. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  759. end
  760. else
  761. begin
  762. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  763. location.register:=left.location.register;
  764. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  765. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  766. end;
  767. end
  768. else
  769. begin
  770. { not yet supported }
  771. internalerror(200610072);
  772. end
  773. end;
  774. procedure tx86addnode.second_addfloat;
  775. var
  776. op : TAsmOp;
  777. begin
  778. if use_sse(resultdef) then
  779. begin
  780. second_addfloatsse;
  781. exit;
  782. end;
  783. pass_left_right;
  784. case nodetype of
  785. addn :
  786. op:=A_FADDP;
  787. muln :
  788. op:=A_FMULP;
  789. subn :
  790. op:=A_FSUBP;
  791. slashn :
  792. op:=A_FDIVP;
  793. else
  794. internalerror(2003042214);
  795. end;
  796. check_left_and_right_fpureg(true);
  797. { if we swaped the tree nodes, then use the reverse operator }
  798. if nf_swapped in flags then
  799. begin
  800. if (nodetype=slashn) then
  801. op:=A_FDIVRP
  802. else if (nodetype=subn) then
  803. op:=A_FSUBRP;
  804. end;
  805. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  806. tcgx86(cg).dec_fpu_stack;
  807. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  808. location.register:=NR_ST;
  809. end;
  810. procedure tx86addnode.second_cmpfloat;
  811. var
  812. resflags : tresflags;
  813. begin
  814. if use_sse(left.resultdef) or use_sse(right.resultdef) then
  815. begin
  816. second_cmpfloatsse;
  817. exit;
  818. end;
  819. pass_left_right;
  820. check_left_and_right_fpureg(true);
  821. {$ifndef x86_64}
  822. if current_settings.cputype<cpu_Pentium2 then
  823. begin
  824. emit_none(A_FCOMPP,S_NO);
  825. tcgx86(cg).dec_fpu_stack;
  826. tcgx86(cg).dec_fpu_stack;
  827. { load fpu flags }
  828. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  829. emit_reg(A_FNSTSW,S_NO,NR_AX);
  830. emit_none(A_SAHF,S_NO);
  831. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  832. if nf_swapped in flags then
  833. begin
  834. case nodetype of
  835. equaln : resflags:=F_E;
  836. unequaln : resflags:=F_NE;
  837. ltn : resflags:=F_A;
  838. lten : resflags:=F_AE;
  839. gtn : resflags:=F_B;
  840. gten : resflags:=F_BE;
  841. end;
  842. end
  843. else
  844. begin
  845. case nodetype of
  846. equaln : resflags:=F_E;
  847. unequaln : resflags:=F_NE;
  848. ltn : resflags:=F_B;
  849. lten : resflags:=F_BE;
  850. gtn : resflags:=F_A;
  851. gten : resflags:=F_AE;
  852. end;
  853. end;
  854. end
  855. else
  856. {$endif x86_64}
  857. begin
  858. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  859. { fcomip pops only one fpu register }
  860. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  861. tcgx86(cg).dec_fpu_stack;
  862. tcgx86(cg).dec_fpu_stack;
  863. { load fpu flags }
  864. if nf_swapped in flags then
  865. begin
  866. case nodetype of
  867. equaln : resflags:=F_E;
  868. unequaln : resflags:=F_NE;
  869. ltn : resflags:=F_A;
  870. lten : resflags:=F_AE;
  871. gtn : resflags:=F_B;
  872. gten : resflags:=F_BE;
  873. end;
  874. end
  875. else
  876. begin
  877. case nodetype of
  878. equaln : resflags:=F_E;
  879. unequaln : resflags:=F_NE;
  880. ltn : resflags:=F_B;
  881. lten : resflags:=F_BE;
  882. gtn : resflags:=F_A;
  883. gten : resflags:=F_AE;
  884. end;
  885. end;
  886. end;
  887. location_reset(location,LOC_FLAGS,OS_NO);
  888. location.resflags:=resflags;
  889. end;
  890. {*****************************************************************************
  891. Add64bit
  892. *****************************************************************************}
  893. procedure tx86addnode.second_add64bit;
  894. begin
  895. {$ifdef cpu64bitalu}
  896. second_addordinal;
  897. {$else cpu64bitalu}
  898. { must be implemented separate }
  899. internalerror(200402042);
  900. {$endif cpu64bitalu}
  901. end;
  902. procedure tx86addnode.second_cmp64bit;
  903. begin
  904. {$ifdef cpu64bitalu}
  905. second_cmpordinal;
  906. {$else cpu64bitalu}
  907. { must be implemented separate }
  908. internalerror(200402043);
  909. {$endif cpu64bitalu}
  910. end;
  911. {*****************************************************************************
  912. AddOrdinal
  913. *****************************************************************************}
  914. procedure tx86addnode.second_addordinal;
  915. begin
  916. { filter unsigned MUL opcode, which requires special handling }
  917. if (nodetype=muln) and
  918. (not(is_signed(left.resultdef)) or
  919. not(is_signed(right.resultdef))) then
  920. begin
  921. second_mul;
  922. exit;
  923. end;
  924. inherited second_addordinal;
  925. end;
  926. procedure tx86addnode.second_cmpordinal;
  927. var
  928. opsize : tcgsize;
  929. unsigned : boolean;
  930. begin
  931. unsigned:=not(is_signed(left.resultdef)) or
  932. not(is_signed(right.resultdef));
  933. opsize:=def_cgsize(left.resultdef);
  934. pass_left_right;
  935. left_must_be_reg(opsize,false);
  936. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  937. location_freetemp(current_asmdata.CurrAsmList,right.location);
  938. location_freetemp(current_asmdata.CurrAsmList,left.location);
  939. location_reset(location,LOC_FLAGS,OS_NO);
  940. location.resflags:=getresflags(unsigned);
  941. end;
  942. begin
  943. caddnode:=tx86addnode;
  944. end.